[ASTERIXDB-3163][COMP] CH2 query was not being optimized by CBO
Change-Id: If2aab5785091464be9a529aacdcda3a9bba70310
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17483
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: <murali.krishna@couchbase.com>
Reviewed-by: Vijay Sarathy <vijay.sarathy@couchbase.com>
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java
index 0889856..b9d6050 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java
@@ -46,7 +46,6 @@
import org.apache.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
import org.apache.hyracks.algebricks.core.algebra.expressions.HashJoinExpressionAnnotation;
import org.apache.hyracks.algebricks.core.algebra.expressions.IExpressionAnnotation;
-import org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
import org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
@@ -95,8 +94,7 @@
// If cboMode is true, then all datasets need to have samples, otherwise the check in doAllDataSourcesHaveSamples()
// further below will return false.
ILogicalOperator op = opRef.getValue();
- if (!((op.getOperatorTag() == LogicalOperatorTag.INNERJOIN)
- || ((op.getOperatorTag() == LogicalOperatorTag.DISTRIBUTE_RESULT)))) {
+ if (!(joinClause(op) || ((op.getOperatorTag() == LogicalOperatorTag.DISTRIBUTE_RESULT)))) {
return false;
}
@@ -106,17 +104,16 @@
}
List<ILogicalOperator> joinOps = new ArrayList<>();
- List<ILogicalOperator> internalEdges = new ArrayList<>();
HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap = new HashMap<>();
// The data scan operators. Will be in the order of the from clause.
// Important for position ordering when assigning bits to join expressions.
List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps = new ArrayList<>();
- HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap = new HashMap<>();
+ List<AssignOperator> assignOps = new ArrayList<>();
IPlanPrettyPrinter pp = context.getPrettyPrinter();
printPlan(pp, (AbstractLogicalOperator) op, "Original Whole plan1");
- boolean canTransform = getJoinOpsAndLeafInputs(op, emptyTupleAndDataSourceOps, joinLeafInputsHashMap,
- dataSourceEmptyTupleHashMap, internalEdges, joinOps);
+ boolean canTransform =
+ getJoinOpsAndLeafInputs(op, emptyTupleAndDataSourceOps, joinLeafInputsHashMap, joinOps, assignOps);
if (!canTransform) {
return false;
@@ -133,8 +130,7 @@
int numberOfFromTerms = emptyTupleAndDataSourceOps.size();
joinEnum.initEnum((AbstractLogicalOperator) op, cboMode, cboTestMode, numberOfFromTerms,
- emptyTupleAndDataSourceOps, joinLeafInputsHashMap, dataSourceEmptyTupleHashMap, internalEdges, joinOps,
- context);
+ emptyTupleAndDataSourceOps, joinLeafInputsHashMap, joinOps, assignOps, context);
if (cboMode) {
if (!doAllDataSourcesHaveSamples(emptyTupleAndDataSourceOps, context)) {
@@ -142,8 +138,18 @@
}
}
- if (internalEdges.size() > 0) {
- pushAssignsIntoLeafInputs(joinLeafInputsHashMap, internalEdges);
+ if (LOGGER.isTraceEnabled()) {
+ LOGGER.trace("---------------------------- Printing Leaf Inputs1");
+ printLeafPlans(pp, joinLeafInputsHashMap);
+ }
+
+ if (assignOps.size() > 0) {
+ pushAssignsIntoLeafInputs(pp, joinLeafInputsHashMap, assignOps);
+ }
+
+ if (LOGGER.isTraceEnabled()) {
+ LOGGER.trace("---------------------------- Printing Leaf Inputs2");
+ printLeafPlans(pp, joinLeafInputsHashMap);
}
int cheapestPlan = joinEnum.enumerateJoins(); // MAIN CALL INTO CBO
@@ -156,7 +162,7 @@
if (numberOfFromTerms > 1) {
buildNewTree(cheapestPlanNode, joinLeafInputsHashMap, joinOps, new MutableInt(0));
printPlan(pp, (AbstractLogicalOperator) joinOps.get(0), "New Whole Plan after buildNewTree 1");
- ILogicalOperator root = addConstantInternalEdgesAtTheTop(joinOps.get(0), internalEdges);
+ ILogicalOperator root = addRemainingAssignsAtTheTop(joinOps.get(0), assignOps);
printPlan(pp, (AbstractLogicalOperator) joinOps.get(0), "New Whole Plan after buildNewTree 2");
printPlan(pp, (AbstractLogicalOperator) root, "New Whole Plan after buildNewTree");
// this will be the new root
@@ -184,6 +190,14 @@
return true;
}
+ private boolean joinClause(ILogicalOperator op) {
+ if (op.getOperatorTag() == LogicalOperatorTag.INNERJOIN)
+ return true;
+ //if (op.getOperatorTag() == LogicalOperatorTag.LEFTOUTERJOIN)
+ //return true;
+ return false;
+ }
+
private boolean getCBOMode(IOptimizationContext context) {
PhysicalOptimizationConfig physOptConfig = context.getPhysicalOptimizationConfig();
return physOptConfig.getCBOMode();
@@ -221,12 +235,51 @@
* Check to see if there is only one assign here and nothing below that other than a join.
* have not seen cases where there is more than one assign in a leafinput.
*/
- private boolean onlyOneAssign(ILogicalOperator nextOp) {
- if (nextOp.getOperatorTag() != LogicalOperatorTag.ASSIGN) {
- return false;
+ private boolean onlyOneAssign(ILogicalOperator op, List<AssignOperator> assignOps) {
+ if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
+ AssignOperator aOp = (AssignOperator) op;
+ assignOps.add(aOp);
+ op = op.getInputs().get(0).getValue();
}
- List<Mutable<ILogicalOperator>> nextOpInputs = nextOp.getInputs();
- return nextOpInputs.get(0).getValue().getOperatorTag() == LogicalOperatorTag.INNERJOIN;
+ return (joinClause(op));
+ }
+
+ // An internal edge must contain only assigns followed by an inner join
+ private int numVarRefExprs(AssignOperator aOp) {
+ List<Mutable<ILogicalExpression>> exprs = aOp.getExpressions();
+ int count = 0;
+ for (Mutable<ILogicalExpression> exp : exprs) {
+ if (exp.getValue().getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
+ AbstractFunctionCallExpression afcExpr = (AbstractFunctionCallExpression) exp.getValue();
+ for (Mutable<ILogicalExpression> arg : afcExpr.getArguments()) {
+ if (arg.getValue().getExpressionTag() == LogicalExpressionTag.VARIABLE) {
+ count++;
+ }
+ }
+ }
+ }
+
+ return count;
+ }
+
+ private boolean onlyAssigns(ILogicalOperator op, List<AssignOperator> assignOps) {
+ while (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
+ AssignOperator aOp = (AssignOperator) op;
+ int count = numVarRefExprs(aOp);
+ if (count > 1) {
+ return false;
+ }
+ assignOps.add(aOp);
+ op = op.getInputs().get(0).getValue();
+ }
+ return (joinClause(op));
+ }
+
+ private ILogicalOperator skipPastAssigns(ILogicalOperator nextOp) {
+ while (nextOp.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
+ nextOp = nextOp.getInputs().get(0).getValue();
+ }
+ return nextOp;
}
private ILogicalOperator findSelectOrDataScan(ILogicalOperator op) {
@@ -254,20 +307,19 @@
*/
private boolean getJoinOpsAndLeafInputs(ILogicalOperator op,
List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps,
- HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap,
- HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap,
- List<ILogicalOperator> internalEdges, List<ILogicalOperator> joinOps) {
+ HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap, List<ILogicalOperator> joinOps,
+ List<AssignOperator> assignOps) {
if (op.getOperatorTag() == LogicalOperatorTag.LEFTOUTERJOIN) {
return false;
}
- if (op.getOperatorTag() == LogicalOperatorTag.INNERJOIN) {
+ if (joinClause(op)) {
joinOps.add(op);
for (int i = 0; i < 2; i++) {
ILogicalOperator nextOp = op.getInputs().get(i).getValue();
boolean canTransform = getJoinOpsAndLeafInputs(nextOp, emptyTupleAndDataSourceOps,
- joinLeafInputsHashMap, dataSourceEmptyTupleHashMap, internalEdges, joinOps);
+ joinLeafInputsHashMap, joinOps, assignOps);
if (!canTransform) {
return false;
}
@@ -288,20 +340,18 @@
} else {
joinLeafInputsHashMap.put(etsOp, op);
}
- dataSourceEmptyTupleHashMap.put(dataSourceOp, etsOp);
} else { // This must be an internal edge
- if (onlyOneAssign(op)) {
+ if (onlyAssigns(op, assignOps)) {
+ //if (onlyOneAssign(op, assignOps)) {
// currently, will handle only assign statement and nothing else in an internal Edge.
// we can lift this restriction later if the need arises. This just makes some code easier.
- internalEdges.add(op);
- boolean canTransform =
- getJoinOpsAndLeafInputs(op.getInputs().get(0).getValue(), emptyTupleAndDataSourceOps,
- joinLeafInputsHashMap, dataSourceEmptyTupleHashMap, internalEdges, joinOps);
+
+ ILogicalOperator skipAssisgnsOp = skipPastAssigns(op);
+ boolean canTransform = getJoinOpsAndLeafInputs(skipAssisgnsOp, emptyTupleAndDataSourceOps,
+ joinLeafInputsHashMap, joinOps, assignOps);
if (!canTransform) {
return false;
}
-
- //internalEdges.add(op); // better to store the parent; do this soon.
} else {
return false;
}
@@ -370,16 +420,12 @@
}
}
- //Internal edges are assign statements. The RHS has a variable in it.
- // We need to find the internal edge that has a variable coming from this leaf leafInput.
- private int findInternalEdge(ILogicalOperator leafInput, List<ILogicalOperator> internalEdges)
- throws AlgebricksException {
+ private int findAssignOp(ILogicalOperator leafInput, List<AssignOperator> assignOps) throws AlgebricksException {
int i = -1;
- for (ILogicalOperator ie : internalEdges) {
+ for (AssignOperator aOp : assignOps) {
i++;
// this will be an Assign, so no need to check
- AssignOperator aOp = (AssignOperator) ie;
List<LogicalVariable> vars = new ArrayList<>();
aOp.getExpressions().get(0).getValue().getUsedVariables(vars);
HashSet<LogicalVariable> vars2 = new HashSet<>();
@@ -392,11 +438,9 @@
return -1;
}
- private ILogicalOperator addAssignToLeafInput(ILogicalOperator leafInput, ILogicalOperator internalEdge) {
- ILogicalOperator root = leafInput;
+ private ILogicalOperator addAssignToLeafInput(ILogicalOperator leafInput, AssignOperator aOp) {
// this will be an Assign, so no need to check
- AssignOperator aOp = (AssignOperator) internalEdge;
- aOp.getInputs().get(0).setValue(root);
+ aOp.getInputs().get(0).setValue(leafInput);
return aOp;
}
@@ -530,15 +574,11 @@
// in some very rare cases, there is an internal edge that has an assign statement such as $$var = 20 but this variable
// is not used anywhere in the current join graph but is used outside the current join graph. So we add this assign to the top of
// our plan, so the rest of the code will be happy. Strange that this assign appears in the join graph.
- private ILogicalOperator addConstantInternalEdgesAtTheTop(ILogicalOperator op,
- List<ILogicalOperator> internalEdges) {
- if (internalEdges.size() == 0) {
- return op;
- }
+
+ private ILogicalOperator addRemainingAssignsAtTheTop(ILogicalOperator op, List<AssignOperator> assignOps) {
+
ILogicalOperator root = op;
- for (ILogicalOperator ie : internalEdges) {
- // this will be an Assign, so no need to check
- AssignOperator aOp = (AssignOperator) ie;
+ for (AssignOperator aOp : assignOps) {
aOp.getInputs().get(0).setValue(root);
root = aOp;
}
@@ -569,45 +609,25 @@
// for every internal edge assign (again assuming only 1 for now), find the corresponding leafInput and place the assign
// on top of that LeafInput. Modify the joinLeafInputsHashMap as well.
- private void pushAssignsIntoLeafInputs(HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap,
- List<ILogicalOperator> internalEdges) throws AlgebricksException {
+ private void pushAssignsIntoLeafInputs(IPlanPrettyPrinter pp,
+ HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap, List<AssignOperator> assignOps)
+ throws AlgebricksException {
for (Map.Entry<EmptyTupleSourceOperator, ILogicalOperator> mapElement : joinLeafInputsHashMap.entrySet()) {
ILogicalOperator joinLeafInput = mapElement.getValue();
+ printPlan(pp, (AbstractLogicalOperator) joinLeafInput, "Incoming leaf Input");
EmptyTupleSourceOperator ets = mapElement.getKey();
- int internalEdgeNumber = findInternalEdge(joinLeafInput, internalEdges);
- if (internalEdgeNumber != -1) {
- joinLeafInput = addAssignToLeafInput(joinLeafInput, internalEdges.get(internalEdgeNumber));
+ int assignNumber = findAssignOp(joinLeafInput, assignOps);
+ if (assignNumber != -1) {
+ joinLeafInput = addAssignToLeafInput(joinLeafInput, assignOps.get(assignNumber));
+ printPlan(pp, (AbstractLogicalOperator) joinLeafInput, "Modified leaf Input");
joinLeafInputsHashMap.put(ets, joinLeafInput);
- internalEdges.remove(internalEdgeNumber); // no longer needed
+ assignOps.remove(assignNumber);
}
}
}
- private boolean substituteVarOnce(ILogicalExpression exp, LogicalVariable oldVar, LogicalVariable newVar) {
- switch (exp.getExpressionTag()) {
- case FUNCTION_CALL:
- AbstractFunctionCallExpression fun = (AbstractFunctionCallExpression) exp;
- for (int i = 0; i < fun.getArguments().size(); i++) {
- ILogicalExpression arg = fun.getArguments().get(i).getValue();
- if (substituteVarOnce(arg, oldVar, newVar)) {
- return true;
- }
- }
- return false;
- case VARIABLE:
- VariableReferenceExpression varExpr = (VariableReferenceExpression) exp;
- if (varExpr.getVariableReference().equals(oldVar)) {
- varExpr.setVariable(newVar);
- return true;
- }
- return false;
- default:
- return false;
- }
- }
-
// check to see if every dataset has a sample. If not, CBO code cannot run. A warning message must be issued as well.
private boolean doAllDataSourcesHaveSamples(
List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps,
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
index c8ac57e..9154bd9 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
@@ -81,9 +81,8 @@
protected int jnArraySize;
protected List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps;
protected Map<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap;
- protected Map<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap;
protected List<ILogicalExpression> singleDatasetPreds;
- protected List<ILogicalOperator> internalEdges;
+ protected List<AssignOperator> assignOps;
protected List<ILogicalOperator> joinOps;
protected ILogicalOperator localJoinOp; // used in nestedLoopsApplicable code.
protected IOptimizationContext optCtx;
@@ -104,12 +103,11 @@
public void initEnum(AbstractLogicalOperator op, boolean cboMode, boolean cboTestMode, int numberOfFromTerms,
List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps,
- Map<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap,
- Map<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap,
- List<ILogicalOperator> internalEdges, List<ILogicalOperator> joinOps, IOptimizationContext context) {
+ Map<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap, List<ILogicalOperator> joinOps,
+ List<AssignOperator> assignOps, IOptimizationContext context) {
this.singleDatasetPreds = new ArrayList<>();
this.joinConditions = new ArrayList<>();
- this.internalEdges = new ArrayList<>();
+ this.assignOps = new ArrayList<>();
this.allPlans = new ArrayList<>();
this.numberOfTerms = numberOfFromTerms;
this.cboMode = cboMode;
@@ -119,8 +117,7 @@
this.physOptConfig = context.getPhysicalOptimizationConfig();
this.emptyTupleAndDataSourceOps = emptyTupleAndDataSourceOps;
this.joinLeafInputsHashMap = joinLeafInputsHashMap;
- this.dataSourceEmptyTupleHashMap = dataSourceEmptyTupleHashMap;
- this.internalEdges = internalEdges;
+ this.assignOps = assignOps;
this.joinOps = joinOps;
this.op = op;
this.forceJoinOrderMode = getForceJoinOrderMode(context);
@@ -168,10 +165,6 @@
return joinLeafInputsHashMap;
}
- public Map<DataSourceScanOperator, EmptyTupleSourceOperator> getDataSourceEmptyTupleHashMap() {
- return dataSourceEmptyTupleHashMap;
- }
-
public ILogicalOperator findLeafInput(List<LogicalVariable> logicalVars) throws AlgebricksException {
Set<LogicalVariable> vars = new HashSet<>();
for (Pair<EmptyTupleSourceOperator, DataSourceScanOperator> emptyTupleAndDataSourceOp : emptyTupleAndDataSourceOps) {
@@ -339,16 +332,14 @@
}
// so this variable must be in an internal edge in an assign statement. Find the RHS variables there
- for (ILogicalOperator op : this.internalEdges) {
- if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
- List<LogicalVariable> vars2 = new ArrayList<>();
- VariableUtilities.getUsedVariables(op, vars2);
- int bits = 0;
- for (LogicalVariable lv2 : vars2) {
- bits |= findBits(lv2);
- }
- return bits;
+ for (AssignOperator op : this.assignOps) {
+ List<LogicalVariable> vars2 = new ArrayList<>();
+ VariableUtilities.getUsedVariables(op, vars2);
+ int bits = 0;
+ for (LogicalVariable lv2 : vars2) {
+ bits |= findBits(lv2);
}
+ return bits;
}
// should never reach this because every variable must exist in some leaf input.
return JoinNode.NO_JN;
@@ -387,8 +378,7 @@
usedVars.clear();
ILogicalExpression expr = jc.joinCondition;
expr.getUsedVariables(usedVars);
- for (ILogicalOperator ie : internalEdges) {
- AssignOperator aOp = (AssignOperator) ie;
+ for (AssignOperator aOp : assignOps) {
for (int i = 0; i < aOp.getVariables().size(); i++) {
if (usedVars.contains(aOp.getVariables().get(i))) {
OperatorManipulationUtil.replaceVarWithExpr((AbstractFunctionCallExpression) expr,