[ASTERIXDB-3303][COMP] projection sizes
Change-Id: I0f9df7b7937e0248c6fdae84a527680fac2fa915
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17946
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Vijay Sarathy <vijay.sarathy@couchbase.com>
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
index d33f8ce..244279d 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
@@ -38,8 +38,8 @@
import org.apache.asterix.metadata.declared.MetadataProvider;
import org.apache.asterix.metadata.declared.SampleDataSource;
import org.apache.asterix.metadata.entities.Index;
-import org.apache.asterix.om.base.AInt64;
import org.apache.asterix.om.base.AOrderedList;
+import org.apache.asterix.om.base.ARecord;
import org.apache.asterix.om.base.IAObject;
import org.apache.asterix.om.constants.AsterixConstantValue;
import org.apache.asterix.om.functions.BuiltinFunctions;
@@ -898,7 +898,12 @@
// There are predicates here. So skip the predicates and get the original dataset card.
// Now apply all the predicates and get the card after all predicates are applied.
result = stats.runSamplingQuery(this.optCtx, leafInput);
- double predicateCardinality = ((double) ((AInt64) result.get(0).get(0)).getLongValue());
+ double predicateCardinality = stats.findPredicateCardinality(result);
+
+ double projectedSize = -1.0;
+ if (predicateCardinality > 0.0) { // otherwise, we get nulls for the averages
+ projectedSize = stats.findProjectedSize(result);
+ }
if (predicateCardinality == 0.0) {
predicateCardinality = 0.0001 * idxDetails.getSampleCardinalityTarget();
}
@@ -913,11 +918,17 @@
.warn(Warning.of(scanOp.getSourceLocation(), ErrorCode.SAMPLE_HAS_ZERO_ROWS));
}
}
-
finalDatasetCard *= predicateCardinality / sampleCard;
// now switch the input back.
parent.getInputs().get(0).setValue(scanOp);
jn.setCardinality(finalDatasetCard);
+ if (projectedSize > 0.0) {
+ jn.setAvgDocSize(projectedSize);
+ } else {
+ ARecord record = (ARecord) (((IAObject) ((List<IAObject>) (result.get(0))).get(0)));
+ int fields = record.numberOfFields();
+ jn.setAvgDocSize(fields * 100); // cant think of anything better... cards are more important anyway
+ }
}
}
dataScanPlan = jn.addSingleDatasetPlans();
@@ -1052,7 +1063,8 @@
SelectOperator selOp = new SelectOperator(new MutableObject<>(exp));
selOp.getInputs().add(new MutableObject<>(leafInput));
result = stats.runSamplingQuery(this.optCtx, selOp);
- predicateCardinality = ((double) ((AInt64) result.get(0).get(0)).getLongValue());
+ predicateCardinality = stats.findPredicateCardinality(result);
+
if (predicateCardinality == 0.0) {
predicateCardinality = 0.0001 * idxDetails.getSampleCardinalityTarget();
}
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index d59fd73..364289c 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -24,17 +24,24 @@
import java.util.List;
import org.apache.asterix.compiler.provider.IRuleSetFactory;
+import org.apache.asterix.lang.common.expression.LiteralExpr;
+import org.apache.asterix.lang.common.literal.StringLiteral;
+import org.apache.asterix.lang.common.util.FunctionUtil;
import org.apache.asterix.metadata.declared.DataSource;
import org.apache.asterix.metadata.declared.DataSourceId;
import org.apache.asterix.metadata.declared.MetadataProvider;
import org.apache.asterix.metadata.declared.SampleDataSource;
import org.apache.asterix.metadata.entities.Index;
+import org.apache.asterix.om.base.ADouble;
import org.apache.asterix.om.base.AInt64;
+import org.apache.asterix.om.base.ARecord;
import org.apache.asterix.om.base.IAObject;
+import org.apache.asterix.om.constants.AsterixConstantValue;
import org.apache.asterix.om.functions.BuiltinFunctionInfo;
import org.apache.asterix.om.functions.BuiltinFunctions;
import org.apache.asterix.optimizer.base.AnalysisUtil;
import org.apache.asterix.optimizer.rules.am.array.AbstractOperatorFromSubplanRewrite;
+import org.apache.asterix.translator.ConstantHelper;
import org.apache.commons.lang3.mutable.Mutable;
import org.apache.commons.lang3.mutable.MutableObject;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
@@ -49,11 +56,17 @@
import org.apache.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
import org.apache.hyracks.algebricks.core.algebra.expressions.JoinProductivityAnnotation;
import org.apache.hyracks.algebricks.core.algebra.expressions.PredicateCardinalityAnnotation;
+import org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
+import org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
import org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import org.apache.hyracks.algebricks.core.algebra.plan.ALogicalPlanImpl;
import org.apache.hyracks.algebricks.core.algebra.util.OperatorManipulationUtil;
import org.apache.hyracks.algebricks.core.algebra.util.OperatorPropertiesUtil;
import org.apache.hyracks.api.exceptions.ErrorCode;
@@ -489,7 +502,7 @@
}
}
- double predicateCardinality = (double) ((AInt64) result.get(0).get(0)).getLongValue();
+ double predicateCardinality = findPredicateCardinality(result); // this routine knows how to look into the record inside result
if (predicateCardinality == 0.0) {
predicateCardinality = 0.0001 * idxDetails.getSampleCardinalityTarget();
}
@@ -512,7 +525,7 @@
selOp.getCondition().setValue(ConstantExpression.TRUE);
result = runSamplingQuery(optCtx, selOp);
selOp.getCondition().setValue(saveExprs);
- sampleCard = (double) ((AInt64) result.get(0).get(0)).getLongValue();
+ sampleCard = findPredicateCardinality(result);
}
// switch the scanOp back
parent.getInputs().get(0).setValue(scanOp);
@@ -521,7 +534,26 @@
return sel;
}
- private void transformtoAnyInPlan(SelectOperator newSelOp) {
+ public double findPredicateCardinality(List<List<IAObject>> result) {
+ ARecord record = (ARecord) (((IAObject) ((List<IAObject>) (result.get(0))).get(0)));
+ int fields = record.numberOfFields();
+ IAObject first = record.getValueByPos(0);
+ double predicateCardinality = ((double) ((AInt64) first).getLongValue());
+ return predicateCardinality;
+ }
+
+ // Can have null returned, so this routine should only be called if at least tuple is returned by the sample
+ public double findProjectedSize(List<List<IAObject>> result) {
+ ARecord record = (ARecord) (((IAObject) ((List<IAObject>) (result.get(0))).get(0)));
+ // Now figure out the projected size
+ double projectedSize = 0.0;
+ int fields = record.numberOfFields();
+ for (int j = 1; j < fields; j++) {
+ IAObject field = record.getValueByPos(j);
+ double size = ((double) ((ADouble) field).getDoubleValue());
+ projectedSize += size;
+ }
+ return projectedSize;
}
protected List<List<IAObject>> runSamplingQuery(IOptimizationContext ctx, ILogicalOperator logOp)
@@ -531,28 +563,107 @@
IOptimizationContext newCtx = ctx.getOptimizationContextFactory().cloneOptimizationContext(ctx);
ILogicalOperator newScanOp = OperatorManipulationUtil.bottomUpCopyOperators(logOp);
+ // Now we have to generate plans like this on top of the scanOp (logOp)
+ // project ([$$79])
+ // assign [$$79] <- [{"$1": $$73, "$2": $$74, "$3": $$75, "$4": $$76, "$5": $$77, "$6": $$78}]
+ // aggregate [$$73, $$74, $$75, $$76, $$77, $$78] <- [agg-count(true), sql-avg($$68), sql-avg($$69), sql-avg($$70), sql-avg($$71), sql-avg($$72)]
+ // assign [$$68, $$69, $$70, $$71, $$72] <- [serialized-size($$60), serialized-size($$str), serialized-size($$61), serialized-size($$65), serialized-size($$67)]
+ // add the assign [$$56, ..., ] <- [encoded-size($$67), ..., ] on top of newAggOp
+ List<LogicalVariable> vars = new ArrayList<>();
+ VariableUtilities.getLiveVariables(logOp, vars);
+ LogicalVariable newVar;
+ // array to keep track of the assigns
+ List<LogicalVariable> newVars = new ArrayList<>();
+ List<Mutable<ILogicalExpression>> exprs = new ArrayList<>();
+
+ // create this assignOperator
+ // assign [$$68, $$69, $$70, $$71, $$72] <- [serialized-size($$60), serialized-size($$str), serialized-size($$61), serialized-size($$65), serialized-size($$67)]
+ int count = 0;
+ for (LogicalVariable lv : vars) {
+ count++;
+ VariableReferenceExpression varRefExpr = new VariableReferenceExpression(lv);
+ List<Mutable<ILogicalExpression>> vars2 = new ArrayList<>();
+ vars2.add(new MutableObject<>(varRefExpr));
+ ScalarFunctionCallExpression func = new ScalarFunctionCallExpression(
+ FunctionUtil.getFunctionInfo(BuiltinFunctions.SERIALIZED_SIZE), vars2);
+ exprs.add(new MutableObject<>(func));
+ newVar = newCtx.newVar();
+ newVars.add(newVar);
+ }
+
+ AssignOperator assignOp = new AssignOperator(newVars, exprs);
+ assignOp.getInputs().add(new MutableObject<>(newScanOp));
+ Mutable<ILogicalOperator> tmpRef = new MutableObject<>(assignOp);
+ String viewInPlan = new ALogicalPlanImpl(tmpRef).toString();
+
+ // aggregate [$$73, $$74, $$75, $$76, $$77, $$78] <- [agg-count(true), sql-avg($$68), sql-avg($$69), sql-avg($$70), sql-avg($$71), sql-avg($$72)]
+ // add the count-agg (true) first
+ List<LogicalVariable> newVars2 = new ArrayList<>();
+ List<Mutable<ILogicalExpression>> aggExprList = new ArrayList<>();
List<Mutable<ILogicalExpression>> aggFunArgs = new ArrayList<>(1);
aggFunArgs.add(new MutableObject<>(ConstantExpression.TRUE));
BuiltinFunctionInfo countFn = BuiltinFunctions.getBuiltinFunctionInfo(BuiltinFunctions.COUNT);
AggregateFunctionCallExpression aggExpr = new AggregateFunctionCallExpression(countFn, false, aggFunArgs);
-
- List<Mutable<ILogicalExpression>> aggExprList = new ArrayList<>(1);
aggExprList.add(new MutableObject<>(aggExpr));
-
- List<LogicalVariable> aggVarList = new ArrayList<>(1);
LogicalVariable aggVar = newCtx.newVar();
- aggVarList.add(aggVar);
+ newVars2.add(aggVar);
- AggregateOperator newAggOp = new AggregateOperator(aggVarList, aggExprList);
- newAggOp.getInputs().add(new MutableObject<>(newScanOp));
+ // Now add the other aggs
+ for (int i = 0; i < count; i++) {
+ VariableReferenceExpression varRefExpr;
+ varRefExpr = new VariableReferenceExpression(newVars.get(i));
+ List<Mutable<ILogicalExpression>> vars2 = new ArrayList<>();
+ vars2.add(new MutableObject<>(varRefExpr));
+ BuiltinFunctionInfo avgFn = BuiltinFunctions.getBuiltinFunctionInfo(BuiltinFunctions.SQL_AVG);
+ aggExpr = new AggregateFunctionCallExpression(avgFn, false, vars2);
+ newVar = newCtx.newVar();
+ newVars2.add(newVar);
+ aggExprList.add(new MutableObject<>(aggExpr));
+ }
+
+ // add assign [$$79] <- [{"$1": $$73, "$2": $$74, "$3": $$75, "$4": $$76, "$5": $$77, "$6": $$78}]
+ AggregateOperator newAggOp = new AggregateOperator(newVars2, aggExprList);
+ newAggOp.getInputs().add(new MutableObject<>(assignOp));
Mutable<ILogicalOperator> newAggOpRef = new MutableObject<>(newAggOp);
+ OperatorPropertiesUtil.typeOpRec(newAggOpRef, newCtx); // is this really needed??
+ List<MutableObject> arr = createMutableObjectArray(newAggOp.getVariables());
+ AbstractFunctionCallExpression f = new ScalarFunctionCallExpression(
+ FunctionUtil.getFunctionInfo(BuiltinFunctions.OPEN_RECORD_CONSTRUCTOR));
+ for (int i = 0; i < arr.size(); i++) {
+ f.getArguments().add(arr.get(i));
+ }
- OperatorPropertiesUtil.typeOpRec(newAggOpRef, newCtx);
+ newVar = newCtx.newVar();
+ assignOp = new AssignOperator(newVar, new MutableObject<>(f));
+ assignOp.getInputs().add(new MutableObject<>(newAggOp));
+ ProjectOperator pOp = new ProjectOperator(newVar);
+ pOp.getInputs().add(new MutableObject<>(assignOp));
+
+ Mutable<ILogicalOperator> Ref = new MutableObject<>(pOp);
LOGGER.info("***returning from sample query***");
- return AnalysisUtil.runQuery(newAggOpRef, Arrays.asList(aggVar), newCtx, IRuleSetFactory.RuleSetKind.SAMPLING);
+ OperatorPropertiesUtil.typeOpRec(Ref, newCtx);
+ String viewInPlan3 = new ALogicalPlanImpl(Ref).toString(); //useful when debugging
+ LOGGER.trace("viewInPlan3");
+ LOGGER.trace(viewInPlan3);
+ return AnalysisUtil.runQuery(Ref, Arrays.asList(newVar), newCtx, IRuleSetFactory.RuleSetKind.SAMPLING);
+
+ }
+
+ private List<MutableObject> createMutableObjectArray(List<LogicalVariable> vars) {
+ List<MutableObject> arr = new ArrayList<>();
+ for (int i = 0; i < vars.size(); i++) {
+ LiteralExpr le = new LiteralExpr();
+ StringLiteral value = new StringLiteral("$" + Integer.toString(i + 1)); // these start from 1
+ le.setValue(value);
+ AsterixConstantValue cValue = new AsterixConstantValue(ConstantHelper.objectFromLiteral(le.getValue()));
+ ConstantExpression cExpr = new ConstantExpression(cValue);
+ arr.add(new MutableObject<>(cExpr));
+ arr.add(new MutableObject<>(new VariableReferenceExpression(vars.get(i))));
+ }
+ return arr;
}
public long findDistinctCardinality(ILogicalOperator grpByDistinctOp) throws AlgebricksException {
@@ -603,7 +714,7 @@
ILogicalOperator copyOfSelOp = OperatorManipulationUtil.bottomUpCopyOperators(selOp);
if (setSampleDataSource(copyOfSelOp, sampleDataSource)) {
List<List<IAObject>> result = runSamplingQuery(optCtx, copyOfSelOp);
- sampleSize = ((AInt64) result.get(0).get(0)).getLongValue();
+ sampleSize = (long) findPredicateCardinality(result);
}
return sampleSize;
}
@@ -619,7 +730,7 @@
if (setSampleDataSource(copyOfGrpByDistinctOp, sampleDataSource)) {
// get distinct cardinality from the sampling source
List<List<IAObject>> result = runSamplingQuery(optCtx, copyOfGrpByDistinctOp);
- estDistCardinalityFromSample = ((double) ((AInt64) result.get(0).get(0)).getLongValue());
+ estDistCardinalityFromSample = findPredicateCardinality(result);
}
}
if (estDistCardinalityFromSample != -1.0) { // estimate distinct cardinality for the dataset from the sampled cardinality
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
index 6c621e0..0659d00 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
@@ -2108,7 +2108,8 @@
addFunction(DECODE_DATAVERSE_NAME, OrderedListOfAStringTypeComputer.INSTANCE_NULLABLE, true);
addPrivateFunction(COLLECTION_TO_SEQUENCE, CollectionToSequenceTypeComputer.INSTANCE, true);
- addPrivateFunction(SERIALIZED_SIZE, AInt64TypeComputer.INSTANCE, true);
+ addFunction(SERIALIZED_SIZE, AInt64TypeComputer.INSTANCE, true);
+ // used by CBO's internal sampling queries for determining projection sizes
// external lookup
addPrivateFunction(EXTERNAL_LOOKUP, AnyTypeComputer.INSTANCE, false);