[ASTERIXDB-3571][COMP] Infer Selection predicates from join predicates
Ext-ref: MB-65670
Change-Id: I7766e107bfadd915b6d760fc1ab74a6651c49db6
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19943
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: <preetham02@apache.org>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java
index 0f4cc82..a75a90b 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java
@@ -19,10 +19,12 @@
package org.apache.asterix.optimizer.rules.cbo;
+import java.util.ArrayList;
import java.util.List;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
public class JoinCondition {
@@ -47,6 +49,7 @@
protected comparisonOp comparisonType;
protected JoinOperator joinOp = null;
protected List<LogicalVariable> usedVars = null;
+ protected List<SelectOperator> derivedSelOps = new ArrayList<>(); // only one of them will be regarded as original
protected enum comparisonOp {
OP_EQ,
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
index 73be298..16518c2 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
@@ -1334,6 +1334,7 @@
findJoinConditionsAndDoTC();
addTCSelectionPredicates();
+ keepOnlyOneSelectivityHint();
int lastBaseLevelJnNum = enumerateBaseLevelJoinNodes();
if (lastBaseLevelJnNum == PlanNode.NO_PLAN) {
return PlanNode.NO_PLAN;
@@ -1375,6 +1376,26 @@
return lastJn.cheapestPlanIndex;
}
+ private void keepOnlyOneSelectivityHint() {
+ AbstractFunctionCallExpression afce;
+ for (JoinCondition jc : joinConditions) {
+ int n = 0;
+ for (SelectOperator selOp : jc.derivedSelOps) {
+ afce = (AbstractFunctionCallExpression) selOp.getCondition().getValue();
+ if (afce.hasAnnotation(PredicateCardinalityAnnotation.class)) {
+ n++;
+ }
+ }
+ if (n <= 1) { // R.a = S.a and R.a < 1
+ return; // perfect. At most one predicate has the annotation
+ } else {// n == 2, both of them have it of them have it, So remove it from the last one
+ // R.a = S.a and R.a < 1 and S.a < 1; user typed in both predicates, so each one looks derived.
+ afce = (AbstractFunctionCallExpression) jc.derivedSelOps.get(n - 1).getCondition().getValue();
+ afce.removeAnnotation(PredicateCardinalityAnnotation.class);
+ }
+ }
+ }
+
// R.a = S.a and R.a op operand ==> S.a op operand
private void addTCSelectionPredicates() throws AlgebricksException {
List<SelectOperator> existingSelOps = new ArrayList<>();
@@ -1396,33 +1417,33 @@
List<JoinCondition> jcs = findVarinJoinPreds(var);
for (JoinCondition jc : jcs) { // join predicate can be R.a = S.a or S.a = R.a. Check for both cases
if (var == jc.usedVars.get(0)) { // R.a
- newSelOp = makeNewSelOper(existingSelOps, jc.usedVars.get(1), // == S.a
+ newSelOp = makeNewSelOper(jc, existingSelOps, jc.usedVars.get(1), // == S.a
((AbstractFunctionCallExpression) selOp.getCondition().getValue()).getFunctionInfo(), // op
exp.getArguments().get(1)); // operand
if (newSelOp != null) { // does not already exist
- addSelOpToLeafInput(jc.usedVars.get(1), newSelOp);
+ addSelOpToLeafInput(jc, jc.usedVars.get(1), newSelOp);
}
} else if (var == jc.usedVars.get(1)) { // R.a
- newSelOp = makeNewSelOper(existingSelOps, jc.usedVars.get(0), // == S.a
+ newSelOp = makeNewSelOper(jc, existingSelOps, jc.usedVars.get(0), // == S.a
((AbstractFunctionCallExpression) selOp.getCondition().getValue()).getFunctionInfo(), // op
exp.getArguments().get(1)); // operand
if (newSelOp != null) {
- addSelOpToLeafInput(jc.usedVars.get(0), newSelOp);
+ addSelOpToLeafInput(jc, jc.usedVars.get(0), newSelOp);
}
}
}
}
}
- private SelectOperator makeNewSelOper(List<SelectOperator> existingSelOps, LogicalVariable var, IFunctionInfo tag,
- Mutable<ILogicalExpression> arg) throws AlgebricksException {
+ private SelectOperator makeNewSelOper(JoinCondition jc, List<SelectOperator> existingSelOps, LogicalVariable var,
+ IFunctionInfo tag, Mutable<ILogicalExpression> arg) throws AlgebricksException {
List<Mutable<ILogicalExpression>> arguments = new ArrayList<>();
VariableReferenceExpression e1 = new VariableReferenceExpression(var);
arguments.add(new MutableObject<>(e1)); // S.a
arguments.add(new MutableObject<>(arg.getValue())); // this will be the operand
ScalarFunctionCallExpression expr = new ScalarFunctionCallExpression(tag, arguments); //S.a op operand
SelectOperator newsel = new SelectOperator(new MutableObject<>(expr), null, null);
- if (newSelNotPresent(newsel, existingSelOps)) {
+ if (newSelNotPresent(jc, newsel, existingSelOps)) {
LOGGER.info("adding newsel " + newsel.getCondition());
return newsel; // add since it does not exist
} else {
@@ -1430,21 +1451,33 @@
}
}
- private boolean newSelNotPresent(SelectOperator newsel, List<SelectOperator> existingSelOps) {
+ private boolean newSelNotPresent(JoinCondition jc, SelectOperator newsel, List<SelectOperator> existingSelOps) {
for (SelectOperator existingSelOp : existingSelOps) {
if (newsel.getCondition().equals(existingSelOp.getCondition())) {
+ PredicateCardinalityAnnotation anno = new PredicateCardinalityAnnotation(0.9999); // cannot be 1.0 as check in setCardsAndSizes will not work
+ AbstractFunctionCallExpression afce =
+ (AbstractFunctionCallExpression) existingSelOp.getCondition().getValue();
+ afce.putAnnotation(anno);
+ jc.derivedSelOps.add(existingSelOp);
return false;
}
}
return true;
}
- private void addSelOpToLeafInput(LogicalVariable var, SelectOperator newSelOp) throws AlgebricksException {
+ private void addSelOpToLeafInput(JoinCondition jc, LogicalVariable var, SelectOperator newSelOp)
+ throws AlgebricksException {
int l = varLeafInputIds.get(var); // get the corresponding leafInput using the map
ILogicalOperator parent = leafInputs.get(l - 1);
ILogicalOperator child = parent.getInputs().get(0).getValue();
parent.getInputs().get(0).setValue(newSelOp);
newSelOp.getInputs().add(new MutableObject<>(child));
+ // Add the selectivity annotation with selectivity 1.0;
+ // Note the actual cardinality will be different; but all join cardinalities should be ok.
+ PredicateCardinalityAnnotation anno = new PredicateCardinalityAnnotation(0.9999);
+ AbstractFunctionCallExpression afce = (AbstractFunctionCallExpression) newSelOp.getCondition().getValue();
+ afce.putAnnotation(anno);
+ jc.derivedSelOps.add(newSelOp);
optCtx.computeAndSetTypeEnvironmentForOperator(newSelOp);
}
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
index 3451966..5cbb6f8 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
@@ -371,6 +371,7 @@
// There are predicates here. So skip the predicates and get the original dataset card.
// Now apply all the predicates and get the card after all predicates are applied.
+ // We call the sampling query even if a selectivity hint was provided because we have to get the lengths of the variables.
result = joinEnum.getStatsHandle().runSamplingQueryProjection(joinEnum.optCtx, leafInput, jnArrayIndex,
primaryKey);
double predicateCardinalityFromSample = joinEnum.getStatsHandle().findPredicateCardinality(result, true);
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index 18bc53f..ab57d3f 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -153,15 +153,7 @@
return 1.0;
}
double productivity = anno.getJoinProductivity();
- if (productivity <= 0) {
- IWarningCollector warningCollector = joinEnum.optCtx.getWarningCollector();
- if (warningCollector.shouldWarn()) {
- warningCollector.warn(Warning.of(joinExpr.getSourceLocation(), ErrorCode.INAPPLICABLE_HINT,
- "productivity",
- "Productivity specified: " + productivity + ", has to be a decimal value greater than 0"));
- }
- return 1.0;
- }
+
if (leftIndex == idx1) {
return productivity / card2;
} else {