Improved matching capability of Rares-style fuzzy-join rule that is more consistent with the index based matching. Added indexed-NL-join hint to resolve choice between Rares-style fuzzy joins (where applicable) and index based fuzzy joins. Enabled a few Rares-style fuzzyjoin tests that used to break.
git-svn-id: https://asterixdb.googlecode.com/svn/branches/asterix_fix_issue_154@553 eaa15691-b419-025a-1212-ee371bd00084
diff --git a/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/FuzzyEqRule.java b/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/FuzzyEqRule.java
index c770e9f..1a2cd2a 100644
--- a/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/FuzzyEqRule.java
+++ b/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/FuzzyEqRule.java
@@ -140,6 +140,8 @@
FunctionIdentifier simFunctionIdentifier = FuzzyUtils.getFunctionIdentifier(simFuncName);
ScalarFunctionCallExpression similarityExp = new ScalarFunctionCallExpression(
FunctionUtils.getFunctionInfo(simFunctionIdentifier), similarityArgs);
+ // Add annotations from the original fuzzy-eq function.
+ similarityExp.getAnnotations().putAll(funcExp.getAnnotations());
ArrayList<Mutable<ILogicalExpression>> cmpArgs = new ArrayList<Mutable<ILogicalExpression>>();
cmpArgs.add(new MutableObject<ILogicalExpression>(similarityExp));
IAObject simThreshold = FuzzyUtils.getSimThreshold(aqlMetadata, simFuncName);
diff --git a/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/FuzzyJoinRule.java b/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/FuzzyJoinRule.java
index d5131a9..e700971 100644
--- a/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/FuzzyJoinRule.java
+++ b/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/FuzzyJoinRule.java
@@ -33,6 +33,7 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IndexedNLJoinExpressionAnnotation;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
@@ -46,6 +47,11 @@
public class FuzzyJoinRule implements IAlgebraicRewriteRule {
+ private static HashSet<FunctionIdentifier> simFuncs = new HashSet<FunctionIdentifier>();
+ static {
+ simFuncs.add(AsterixBuiltinFunctions.SIMILARITY_JACCARD_CHECK);
+ }
+
private static final String AQLPLUS = ""
//
// -- - Stage 3 - --
@@ -133,20 +139,30 @@
return false;
}
- // find fuzzy join condition
+ // Find GET_ITEM function.
AbstractBinaryJoinOperator joinOp = (AbstractBinaryJoinOperator) op;
Mutable<ILogicalExpression> expRef = joinOp.getCondition();
- Mutable<ILogicalExpression> fuzzyExpRef = getSimilarityExpression(expRef);
- if (fuzzyExpRef == null) {
+ Mutable<ILogicalExpression> getItemExprRef = getSimilarityExpression(expRef);
+ if (getItemExprRef == null) {
return false;
}
-
- AbstractFunctionCallExpression funcExp = (AbstractFunctionCallExpression) fuzzyExpRef.getValue();
+ // Check if the GET_ITEM function is on one of the supported similarity-check functions.
+ AbstractFunctionCallExpression getItemFuncExpr = (AbstractFunctionCallExpression) getItemExprRef.getValue();
+ Mutable<ILogicalExpression> argRef = getItemFuncExpr.getArguments().get(0);
+ AbstractFunctionCallExpression simFuncExpr = (AbstractFunctionCallExpression) argRef.getValue();
+ if (!simFuncs.contains(simFuncExpr.getFunctionIdentifier())) {
+ return false;
+ }
+ // Skip this rule based on annotations.
+ if (simFuncExpr.getAnnotations().containsKey(IndexedNLJoinExpressionAnnotation.INSTANCE)) {
+ return false;
+ }
+
List<Mutable<ILogicalOperator>> inputOps = joinOp.getInputs();
ILogicalOperator leftInputOp = inputOps.get(0).getValue();
ILogicalOperator rightInputOp = inputOps.get(1).getValue();
- List<Mutable<ILogicalExpression>> inputExps = funcExp.getArguments();
+ List<Mutable<ILogicalExpression>> inputExps = simFuncExpr.getArguments();
ILogicalExpression inputExp0 = inputExps.get(0).getValue();
ILogicalExpression inputExp1 = inputExps.get(1).getValue();
@@ -315,9 +331,9 @@
ILogicalOperator outputOp = plan.getRoots().get(0).getValue();
SelectOperator extraSelect = null;
- if (fuzzyExpRef != expRef) {
+ if (getItemExprRef != expRef) {
// more than one join condition
- fuzzyExpRef.setValue(ConstantExpression.TRUE);
+ getItemExprRef.setValue(ConstantExpression.TRUE);
switch (joinOp.getJoinKind()) {
case INNER: {
extraSelect = new SelectOperator(expRef);
@@ -343,19 +359,19 @@
return true;
}
- /*
- * look for FUZZY_EQ function call
+ /**
+ * Look for GET_ITEM function call.
*/
private Mutable<ILogicalExpression> getSimilarityExpression(Mutable<ILogicalExpression> expRef) {
ILogicalExpression exp = expRef.getValue();
if (exp.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
- AbstractFunctionCallExpression funcExp = (AbstractFunctionCallExpression) exp;
- if (funcExp.getFunctionIdentifier().equals(AsterixBuiltinFunctions.FUZZY_EQ)) {
- return expRef;
- } else if (funcExp.getFunctionIdentifier().equals(AlgebricksBuiltinFunctions.AND)
- || funcExp.getFunctionIdentifier().equals(AlgebricksBuiltinFunctions.OR)) {
+ AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) exp;
+ if (funcExpr.getFunctionIdentifier().equals(AsterixBuiltinFunctions.GET_ITEM)) {
+ return expRef;
+ }
+ if (funcExpr.getFunctionIdentifier().equals(AlgebricksBuiltinFunctions.AND)) {
for (int i = 0; i < 2; i++) {
- Mutable<ILogicalExpression> expRefRet = getSimilarityExpression(funcExp.getArguments().get(i));
+ Mutable<ILogicalExpression> expRefRet = getSimilarityExpression(funcExpr.getArguments().get(i));
if (expRefRet != null) {
return expRefRet;
}
diff --git a/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/SimilarityCheckRule.java b/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/SimilarityCheckRule.java
index 60e5a3e..e979b6c 100644
--- a/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/SimilarityCheckRule.java
+++ b/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/SimilarityCheckRule.java
@@ -127,7 +127,7 @@
Mutable<ILogicalExpression> simFuncExprRef = null;
ScalarFunctionCallExpression simCheckFuncExpr = null;
AssignOperator matchingAssign = null;
- for (int i = 0; i < assigns.size(); i++) {
+ for (int i = 0; i < assigns.size(); i++) {
AssignOperator assign = assigns.get(i);
for (int j = 0; j < assign.getVariables().size(); j++) {
// Check if variables match.
@@ -253,6 +253,10 @@
simCheckFuncExpr = new ScalarFunctionCallExpression(
FunctionUtils.getFunctionInfo(AsterixBuiltinFunctions.EDIT_DISTANCE_CHECK), similarityArgs);
}
+ // Preserve all annotations.
+ if (simCheckFuncExpr != null) {
+ simCheckFuncExpr.getAnnotations().putAll(funcExpr.getAnnotations());
+ }
return simCheckFuncExpr;
}
diff --git a/asterix-algebra/src/main/java/edu/uci/ics/asterix/translator/AqlExpressionToPlanTranslator.java b/asterix-algebra/src/main/java/edu/uci/ics/asterix/translator/AqlExpressionToPlanTranslator.java
index 35fb3ae..e5712df 100644
--- a/asterix-algebra/src/main/java/edu/uci/ics/asterix/translator/AqlExpressionToPlanTranslator.java
+++ b/asterix-algebra/src/main/java/edu/uci/ics/asterix/translator/AqlExpressionToPlanTranslator.java
@@ -115,6 +115,7 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.BroadcastExpressionAnnotation;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.BroadcastExpressionAnnotation.BroadcastSide;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IExpressionAnnotation;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
@@ -577,6 +578,12 @@
f = new ScalarFunctionCallExpression(
FunctionUtils.getFunctionInfo(fi), args);
}
+ // Put hints into function call expr.
+ if (fcall.hasHints()) {
+ for (IExpressionAnnotation hint : fcall.getHints()) {
+ f.getAnnotations().put(hint, hint);
+ }
+ }
AssignOperator op = new AssignOperator(v,
new MutableObject<ILogicalExpression>(f));
if (topOp != null) {
@@ -828,6 +835,14 @@
}
}
+ // Add hints as annotations.
+ if (op.hasHints() && currExpr instanceof AbstractFunctionCallExpression) {
+ AbstractFunctionCallExpression currFuncExpr = (AbstractFunctionCallExpression) currExpr;
+ for (IExpressionAnnotation hint : op.getHints()) {
+ currFuncExpr.getAnnotations().put(hint, hint);
+ }
+ }
+
LogicalVariable assignedVar = context.newVar();
AssignOperator a = new AssignOperator(assignedVar,
new MutableObject<ILogicalExpression>(currExpr));
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_01.aql
index f6f2f84..ffb9b8a 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_01.aql
@@ -44,5 +44,5 @@
for $a in dataset('Customers')
for $b in dataset('Customers2')
-where $a.interests ~= $b.interests and $a.cid < $b.cid
+where $a.interests /*+ indexnl */ ~= $b.interests and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_02.aql
index 1951e6f..f94fb85 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_02.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_02.aql
@@ -44,5 +44,5 @@
for $a in dataset('Customers')
for $b in dataset('Customers2')
-where $a.interests ~= $b.interests and $a.cid < $b.cid
+where $a.interests /*+ indexnl */ ~= $b.interests and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_03.aql
index d791b85..669c336 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_03.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_03.aql
@@ -38,5 +38,5 @@
for $a in dataset('Customers')
for $b in dataset('Customers')
-where $a.interests ~= $b.interests and $a.cid < $b.cid
+where $a.interests /*+ indexnl */ ~= $b.interests and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_01.aql
index 5f6f59b..63ff85c 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_01.aql
@@ -41,5 +41,5 @@
for $a in dataset('Customers')
for $b in dataset('Customers2')
-where similarity-jaccard-check($a.interests, $b.interests, 0.7f)[0] and $a.cid < $b.cid
+where /*+ indexnl */ similarity-jaccard-check($a.interests, $b.interests, 0.7f)[0] and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_02.aql
index 0754282..b708069 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_02.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_02.aql
@@ -41,5 +41,5 @@
for $a in dataset('Customers')
for $b in dataset('Customers2')
-where similarity-jaccard-check($a.interests, $b.interests, 0.7f)[0] and $a.cid < $b.cid
+where /*+ indexnl */ similarity-jaccard-check($a.interests, $b.interests, 0.7f)[0] and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_03.aql
index 4dbc4d5..21932ef 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_03.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_03.aql
@@ -35,5 +35,5 @@
for $a in dataset('Customers')
for $b in dataset('Customers')
-where similarity-jaccard-check($a.interests, $b.interests, 0.7f)[0] and $a.cid < $b.cid
+where /*+ indexnl */ similarity-jaccard-check($a.interests, $b.interests, 0.7f)[0] and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_01.aql
index ddf386e..c519be9 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_01.aql
@@ -41,5 +41,5 @@
for $a in dataset('Customers')
for $b in dataset('Customers2')
-where similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
+where /*+ indexnl */ similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_02.aql
index 50c3db6..7cd595a 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_02.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_02.aql
@@ -41,5 +41,5 @@
for $a in dataset('Customers')
for $b in dataset('Customers2')
-where similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
+where /*+ indexnl */ similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_03.aql
index 50729ba..c7ad4c4 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_03.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_03.aql
@@ -35,5 +35,5 @@
for $a in dataset('Customers')
for $b in dataset('Customers')
-where similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
+where /*+ indexnl */ similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_01.aql
index 1fa479d..74d043e 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_01.aql
@@ -44,5 +44,5 @@
for $a in dataset('Customers')
for $b in dataset('Customers2')
-where $a.interests ~= $b.interests and $a.cid < $b.cid
+where $a.interests /*+ indexnl */ ~= $b.interests and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_02.aql
index e5b532f..da1ebe9 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_02.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_02.aql
@@ -44,5 +44,5 @@
for $a in dataset('Customers')
for $b in dataset('Customers2')
-where $a.interests ~= $b.interests and $a.cid < $b.cid
+where $a.interests /*+ indexnl */ ~= $b.interests and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_03.aql
index a881c89..283d80a 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_03.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_03.aql
@@ -38,5 +38,5 @@
for $a in dataset('Customers')
for $b in dataset('Customers')
-where $a.interests ~= $b.interests and $a.cid < $b.cid
+where $a.interests /*+ indexnl */ ~= $b.interests and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_01.aql
index 5d95894..7f5e92d 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_01.aql
@@ -41,5 +41,5 @@
for $a in dataset('Customers')
for $b in dataset('Customers2')
-where similarity-jaccard-check($a.interests, $b.interests, 0.7f)[0] and $a.cid < $b.cid
+where /*+ indexnl */ similarity-jaccard-check($a.interests, $b.interests, 0.7f)[0] and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_02.aql
index 561e15f..d367bb8 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_02.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_02.aql
@@ -41,5 +41,5 @@
for $a in dataset('Customers')
for $b in dataset('Customers2')
-where similarity-jaccard-check($a.interests, $b.interests, 0.7f)[0] and $a.cid < $b.cid
+where /*+ indexnl */ similarity-jaccard-check($a.interests, $b.interests, 0.7f)[0] and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_03.aql
index 87d78e5..0bdd09e 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_03.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_03.aql
@@ -35,5 +35,5 @@
for $a in dataset('Customers')
for $b in dataset('Customers')
-where similarity-jaccard-check($a.interests, $b.interests, 0.7f)[0] and $a.cid < $b.cid
+where /*+ indexnl */ similarity-jaccard-check($a.interests, $b.interests, 0.7f)[0] and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_01.aql
index 864ede7..8188e07 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_01.aql
@@ -41,5 +41,5 @@
for $a in dataset('Customers')
for $b in dataset('Customers2')
-where similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
+where /*+ indexnl */ similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_02.aql
index 4d9f89e..ba64230 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_02.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_02.aql
@@ -41,5 +41,5 @@
for $a in dataset('Customers')
for $b in dataset('Customers2')
-where similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
+where /*+ indexnl */ similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_03.aql
index 5eae45b..5007579 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_03.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_03.aql
@@ -35,5 +35,5 @@
for $a in dataset('Customers')
for $b in dataset('Customers')
-where similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
+where /*+ indexnl */ similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/runtimets/ignore.txt b/asterix-app/src/test/resources/runtimets/ignore.txt
index 784be19..ac51e7a 100644
--- a/asterix-app/src/test/resources/runtimets/ignore.txt
+++ b/asterix-app/src/test/resources/runtimets/ignore.txt
@@ -2,13 +2,7 @@
scan/spatial_types_02.aql
scan/temp_types_02.aql
fuzzyjoin/dblp-splits-3_1.aql
-fuzzyjoin/dblp-csx-aqlplus_1.aql
-fuzzyjoin/dblp-csx-aqlplus_2.aql
-fuzzyjoin/dblp-csx-aqlplus_3.aql
fuzzyjoin/events-users-aqlplus_1.aql
-fuzzyjoin/dblp-aqlplus_1.aql
-fuzzyjoin/dblp-csx-dblp-aqlplus_1.aql
-fuzzyjoin/user-vis-int-vis-user-lot-aqlplus_1.aql
subset-collection/04.aql
quantifiers/everysat_01.aql
custord/freq-clerk.aql
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-olist-jaccard.aql b/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-olist-jaccard.aql
index 91fcd80..ddccd2e 100644
--- a/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-olist-jaccard.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-olist-jaccard.aql
@@ -41,7 +41,7 @@
for $a in dataset('Customers')
for $b in dataset('Customers2')
-where similarity-jaccard($a.interests, $b.interests) >= 0.9f
+where /*+ indexnl */ similarity-jaccard($a.interests, $b.interests) >= 0.9f
and $a.cid < $b.cid
order by $a.cid, $b.cid
return { "a": $a.interests, "b": $b.interests }
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-ulist-jaccard.aql b/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-ulist-jaccard.aql
index 2b2d52c..e0dba06 100644
--- a/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-ulist-jaccard.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-ulist-jaccard.aql
@@ -41,7 +41,7 @@
for $a in dataset('Customers')
for $b in dataset('Customers2')
-where similarity-jaccard($a.interests, $b.interests) >= 0.9f
+where /*+ indexnl */ similarity-jaccard($a.interests, $b.interests) >= 0.9f
and $a.cid < $b.cid
order by $a.cid, $b.cid
return { "a": $a.interests, "b": $b.interests }
\ No newline at end of file
diff --git a/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/base/AbstractExpression.java b/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/base/AbstractExpression.java
new file mode 100644
index 0000000..e83a1ed
--- /dev/null
+++ b/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/base/AbstractExpression.java
@@ -0,0 +1,25 @@
+package edu.uci.ics.asterix.aql.base;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IExpressionAnnotation;
+
+public abstract class AbstractExpression implements Expression {
+ protected List<IExpressionAnnotation> hints;
+
+ public void addHint(IExpressionAnnotation hint) {
+ if (hints == null) {
+ hints = new ArrayList<IExpressionAnnotation>();
+ }
+ hints.add(hint);
+ }
+
+ public boolean hasHints() {
+ return hints != null;
+ }
+
+ public List<IExpressionAnnotation> getHints() {
+ return hints;
+ }
+}
diff --git a/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/expression/CallExpr.java b/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/expression/CallExpr.java
index cda7e69..d1f83b0 100644
--- a/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/expression/CallExpr.java
+++ b/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/expression/CallExpr.java
@@ -2,17 +2,18 @@
import java.util.List;
+import edu.uci.ics.asterix.aql.base.AbstractExpression;
import edu.uci.ics.asterix.aql.base.Expression;
import edu.uci.ics.asterix.aql.expression.visitor.IAqlExpressionVisitor;
import edu.uci.ics.asterix.aql.expression.visitor.IAqlVisitorWithVoidReturn;
import edu.uci.ics.asterix.common.exceptions.AsterixException;
import edu.uci.ics.asterix.om.functions.AsterixFunction;
-public class CallExpr implements Expression {
+public class CallExpr extends AbstractExpression {
private AsterixFunction ident;
private List<Expression> exprList;
- private boolean isBuiltin;
-
+ private boolean isBuiltin;
+
public CallExpr() {
}
@@ -32,7 +33,7 @@
public List<Expression> getExprList() {
return exprList;
}
-
+
public void setExprList(List<Expression> exprList) {
this.exprList = exprList;
}
diff --git a/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/expression/OperatorExpr.java b/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/expression/OperatorExpr.java
index b6bb55b..23d2179 100644
--- a/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/expression/OperatorExpr.java
+++ b/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/expression/OperatorExpr.java
@@ -2,12 +2,13 @@
import java.util.ArrayList;
+import edu.uci.ics.asterix.aql.base.AbstractExpression;
import edu.uci.ics.asterix.aql.base.Expression;
import edu.uci.ics.asterix.aql.expression.visitor.IAqlExpressionVisitor;
import edu.uci.ics.asterix.aql.expression.visitor.IAqlVisitorWithVoidReturn;
import edu.uci.ics.asterix.common.exceptions.AsterixException;
-public class OperatorExpr implements Expression {
+public class OperatorExpr extends AbstractExpression {
private ArrayList<Expression> exprList;
private ArrayList<OperatorType> opList;
private ArrayList<Integer> exprBroadcastIdx;
diff --git a/asterix-aql/src/main/javacc/AQL.jj b/asterix-aql/src/main/javacc/AQL.jj
index 0767c4e..36a5d3e 100644
--- a/asterix-aql/src/main/javacc/AQL.jj
+++ b/asterix-aql/src/main/javacc/AQL.jj
@@ -39,6 +39,8 @@
import edu.uci.ics.asterix.common.annotations.*;
import edu.uci.ics.asterix.common.exceptions.AsterixException;
import edu.uci.ics.asterix.om.functions.AsterixFunction;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IExpressionAnnotation;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IndexedNLJoinExpressionAnnotation;
public class AQLParser extends ScopeChecker {
@@ -59,6 +61,7 @@
// optimizer hints
private static final String HASH_GROUP_BY_HINT = "hash";
private static final String BROADCAST_JOIN_HINT = "bcast";
+ private static final String INDEXED_NESTED_LOOP_JOIN_HINT = "indexnl";
private static final String INMEMORY_HINT = "inmem";
private static final String VAL_FILE_HINT = "val-files";
private static final String VAL_FILE_SAME_INDEX_HINT = "val-file-same-idx";
@@ -1246,14 +1249,15 @@
OperatorExpr op = null;
Expression operand = null;
boolean broadcast = false;
+ IExpressionAnnotation annotation = null;
}
{
operand = AddExpr()
- {
- if (operand instanceof VariableExpr) {
- String hint = getHint(token);
+ {
+ if (operand instanceof VariableExpr) {
+ String hint = getHint(token);
if (hint != null && hint.equals(BROADCAST_JOIN_HINT)) {
- broadcast = true;
+ broadcast = true;
}
}
}
@@ -1261,6 +1265,10 @@
(
LOOKAHEAD(2)( "<" | ">" | "<=" | ">=" | "=" | "!=" |"~=")
{
+ String mhint = getHint(token);
+ if (mhint != null && mhint.equals(INDEXED_NESTED_LOOP_JOIN_HINT)) {
+ annotation = IndexedNLJoinExpressionAnnotation.INSTANCE;
+ }
if (op == null) {
op = new OperatorExpr();
op.addOperand(operand, broadcast);
@@ -1273,18 +1281,21 @@
operand = AddExpr()
{
- broadcast = false;
- if (operand instanceof VariableExpr) {
- String hint = getHint(token);
+ broadcast = false;
+ if (operand instanceof VariableExpr) {
+ String hint = getHint(token);
if (hint != null && hint.equals(BROADCAST_JOIN_HINT)) {
broadcast = true;
}
- }
+ }
op.addOperand(operand, broadcast);
}
)?
{
+ if (annotation != null) {
+ op.addHint(annotation);
+ }
return op==null? operand: op;
}
}
@@ -1728,14 +1739,18 @@
Expression FunctionCallExpr() throws ParseException:
{
CallExpr pf = new CallExpr();
- List<Expression > argList = new ArrayList<Expression >();
+ List<Expression> argList = new ArrayList<Expression>();
Expression tmp;
int arity = 0;
Token funcName;
}
-{
+{
( <IDENTIFIER> | <DATASET> )
{
+ String hint = getHint(token);
+ if (hint != null && hint.startsWith(INDEXED_NESTED_LOOP_JOIN_HINT)) {
+ pf.addHint(IndexedNLJoinExpressionAnnotation.INSTANCE);
+ }
funcName = getToken(0);
}
<LEFTPAREN> (tmp = Expression()
@@ -1744,7 +1759,7 @@
arity ++;
} ("," tmp = Expression() { argList.add(tmp); arity++; })*)? <RIGHTPAREN>
- {
+ {
AsterixFunction fd = lookupFunctionSignature(funcName.toString(), arity);
if(fd == null)
{