[NO ISSUE][COMP] Improve variable substitution
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- Rename fields in PropagatingTypeEnvironment to
align with their semantics
- PropagatingTypeEnvironment.substituteProducedVariable()
should also substitute variables in 'nonMissableVariables'
and 'correlatedMissableVariableLists'
- Minor improvements in SubstituteVariableVisitor
Change-Id: I8acafe7fae8fa53dc962fe260e48e8ff84dadb86
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/9603
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Dmitry Lychagin <dmitry.lychagin@couchbase.com>
Reviewed-by: Ali Alsuliman <ali.al.solaiman@gmail.com>
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/AssignOperator.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/AssignOperator.java
index 202c291..ade9552 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/AssignOperator.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/AssignOperator.java
@@ -23,11 +23,9 @@
import org.apache.commons.lang3.mutable.Mutable;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
import org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
import org.apache.hyracks.algebricks.core.algebra.properties.LocalOrderProperty;
import org.apache.hyracks.algebricks.core.algebra.properties.VariablePropagationPolicy;
import org.apache.hyracks.algebricks.core.algebra.typing.ITypingContext;
@@ -80,15 +78,6 @@
for (int i = 0; i < n; i++) {
env.setVarType(variables.get(i), ctx.getExpressionTypeComputer().getType(expressions.get(i).getValue(),
ctx.getMetadataProvider(), env));
- if (expressions.get(i).getValue().getExpressionTag() == LogicalExpressionTag.VARIABLE) {
- LogicalVariable var =
- ((VariableReferenceExpression) expressions.get(i).getValue()).getVariableReference();
- for (List<LogicalVariable> list : env.getCorrelatedMissableVariableLists()) {
- if (list.contains(var)) {
- list.add(variables.get(i));
- }
- }
- }
}
return env;
}
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterJoinOperator.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterJoinOperator.java
index 797c5eb..4e382d2 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterJoinOperator.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterJoinOperator.java
@@ -67,8 +67,8 @@
PropagatingTypeEnvironment env =
new PropagatingTypeEnvironment(ctx.getExpressionTypeComputer(), ctx.getMissableTypeComputer(),
ctx.getMetadataProvider(), TypePropagationPolicy.LEFT_OUTER, envPointers);
- List<LogicalVariable> liveVars = new ArrayList<LogicalVariable>();
- VariableUtilities.getLiveVariables(inputs.get(1).getValue(), liveVars); // live variables from outer branch can be null together
+ List<LogicalVariable> liveVars = new ArrayList<>();
+ VariableUtilities.getLiveVariables(inputs.get(1).getValue(), liveVars); // live variables from right branch can be MISSING together
env.getCorrelatedMissableVariableLists().add(liveVars);
return env;
}
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestMapOperator.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestMapOperator.java
index 6bacdb4..cd009c0 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestMapOperator.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestMapOperator.java
@@ -18,7 +18,6 @@
*/
package org.apache.hyracks.algebricks.core.algebra.operators.logical;
-import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.mutable.Mutable;
@@ -61,10 +60,7 @@
// Propagates all input variables that come from the outer branch.
PropagatingTypeEnvironment env = createPropagatingAllInputsTypeEnvironment(ctx);
- env.getCorrelatedMissableVariableLists().add(new ArrayList<>(variables));
-
- // For the variables from the inner branch, the output type is the union
- // of (original type + null).
+ // The produced variables of the this operator are missable because of the left outer semantics.
for (int i = 0; i < variables.size(); i++) {
env.setVarType(variables.get(i), ctx.getMissableTypeComputer().makeMissableType(variableTypes.get(i)));
}
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestOperator.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestOperator.java
index 8c95a3f..14996dd 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestOperator.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestOperator.java
@@ -14,9 +14,6 @@
*/
package org.apache.hyracks.algebricks.core.algebra.operators.logical;
-import java.util.ArrayList;
-import java.util.List;
-
import org.apache.commons.lang3.mutable.Mutable;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
@@ -46,21 +43,14 @@
@Override
public IVariableTypeEnvironment computeOutputTypeEnvironment(ITypingContext ctx) throws AlgebricksException {
PropagatingTypeEnvironment env = createPropagatingAllInputsTypeEnvironment(ctx);
+
+ // The produced variables of the this operator are missable because of the left outer semantics.
Object t = env.getType(expression.getValue());
- // For the variables from the inner branch, the output type is the union
- // of (original type + missing).
env.setVarType(variables.get(0), ctx.getMissableTypeComputer().makeMissableType(t));
if (positionalVariable != null) {
env.setVarType(positionalVariable, ctx.getMissableTypeComputer().makeMissableType(positionalVariableType));
}
- // The produced variables of the this operator are missable because of the left outer semantics.
- List<LogicalVariable> missableVars = new ArrayList<>();
- missableVars.add(variables.get(0));
- if (positionalVariable != null) {
- missableVars.add(positionalVariable);
- }
- env.getCorrelatedMissableVariableLists().add(missableVars);
return env;
}
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/SelectOperator.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/SelectOperator.java
index 26ce137..b2e2dfd 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/SelectOperator.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/SelectOperator.java
@@ -113,7 +113,7 @@
ILogicalExpression a2 = f2.getArguments().get(0).getValue();
if (a2.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
LogicalVariable var = ((VariableReferenceExpression) a2).getVariableReference();
- env.getNonNullVariables().add(var);
+ env.getNonMissableVariables().add(var);
}
}
}
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/SubstituteVariableVisitor.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/SubstituteVariableVisitor.java
index 0b1bf5b..d1a1c03 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/SubstituteVariableVisitor.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/SubstituteVariableVisitor.java
@@ -169,8 +169,9 @@
if (!producedVarFound) {
substInNestedPlans(op, pair.first, pair.second);
}
- // always call substProducedVarInTypeEnvironment() because GroupByOperator.computeOutputTypeEnvironment()
- // adds used vars into output type environment in some cases.
+ // GROUP BY operator may add its used variables
+ // to its own output type environment as produced variables
+ // therefore we need perform variable substitution in its own type environment
// TODO (dmitry): this needs to be revisited
substProducedVarInTypeEnvironment(op, pair);
return null;
@@ -187,6 +188,10 @@
public Void visitLeftOuterJoinOperator(LeftOuterJoinOperator op, Pair<LogicalVariable, LogicalVariable> pair)
throws AlgebricksException {
substUsedVariablesInExpr(op.getCondition(), pair.first, pair.second);
+ // LEFT OUTER JOIN operator adds its right branch variables
+ // to its own output type environment as 'correlatedMissableVariables'
+ // therefore we need perform variable substitution in its own type environment
+ substProducedVarInTypeEnvironment(op, pair);
return null;
}
@@ -245,8 +250,13 @@
}
@Override
- public Void visitSelectOperator(SelectOperator op, Pair<LogicalVariable, LogicalVariable> pair) {
+ public Void visitSelectOperator(SelectOperator op, Pair<LogicalVariable, LogicalVariable> pair)
+ throws AlgebricksException {
substUsedVariablesInExpr(op.getCondition(), pair.first, pair.second);
+ // SELECT operator may add its used variable
+ // to its own output type environment as 'nonMissableVariable' (not(is-missing($used_var))
+ // therefore we need perform variable substitution in its own type environment
+ substProducedVarInTypeEnvironment(op, pair);
return null;
}
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/TypePropagationPolicy.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/TypePropagationPolicy.java
index 9d60370..c37c674 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/TypePropagationPolicy.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/TypePropagationPolicy.java
@@ -31,21 +31,22 @@
@Override
public Object getVarType(LogicalVariable var, IMissableTypeComputer ntc,
- List<LogicalVariable> nonNullVariableList, List<List<LogicalVariable>> correlatedNullableVariableLists,
- ITypeEnvPointer... typeEnvs) throws AlgebricksException {
+ List<LogicalVariable> nonMissableVariableList,
+ List<List<LogicalVariable>> correlatedMissableVariableLists, ITypeEnvPointer... typeEnvs)
+ throws AlgebricksException {
for (ITypeEnvPointer p : typeEnvs) {
IVariableTypeEnvironment env = p.getTypeEnv();
if (env == null) {
throw new AlgebricksException(
"Null environment for pointer " + p + " in getVarType for var=" + var);
}
- Object t = env.getVarType(var, nonNullVariableList, correlatedNullableVariableLists);
+ Object t = env.getVarType(var, nonMissableVariableList, correlatedMissableVariableLists);
if (t != null) {
if (ntc != null && ntc.canBeMissing(t)) {
- for (List<LogicalVariable> list : correlatedNullableVariableLists) {
+ for (List<LogicalVariable> list : correlatedMissableVariableLists) {
if (list.contains(var)) {
for (LogicalVariable v : list) {
- if (nonNullVariableList.contains(v)) {
+ if (nonMissableVariableList.contains(v)) {
return ntc.getNonOptionalType(t);
}
}
@@ -63,16 +64,17 @@
@Override
public Object getVarType(LogicalVariable var, IMissableTypeComputer ntc,
- List<LogicalVariable> nonNullVariableList, List<List<LogicalVariable>> correlatedNullableVariableLists,
- ITypeEnvPointer... typeEnvs) throws AlgebricksException {
+ List<LogicalVariable> nonMissableVariableList,
+ List<List<LogicalVariable>> correlatedMissableVariableLists, ITypeEnvPointer... typeEnvs)
+ throws AlgebricksException {
int n = typeEnvs.length;
// Searches from the inner branch to the outer branch.
// TODO(buyingyi): A split operator could lead to the case that the type for a variable could be
// found in both inner and outer branches. Fix computeOutputTypeEnvironment() in ProjectOperator
// and investigate why many test queries fail if only live variables' types are propagated.
for (int i = n - 1; i >= 0; i--) {
- Object t =
- typeEnvs[i].getTypeEnv().getVarType(var, nonNullVariableList, correlatedNullableVariableLists);
+ Object t = typeEnvs[i].getTypeEnv().getVarType(var, nonMissableVariableList,
+ correlatedMissableVariableLists);
if (t == null) {
continue;
}
@@ -82,7 +84,7 @@
// inner branch
boolean nonMissingVarIsProduced = false;
- for (LogicalVariable v : nonNullVariableList) {
+ for (LogicalVariable v : nonMissableVariableList) {
boolean toBreak = false;
if (v == var) {
nonMissingVarIsProduced = true;
@@ -106,6 +108,6 @@
};
public abstract Object getVarType(LogicalVariable var, IMissableTypeComputer ntc,
- List<LogicalVariable> nonNullVariableList, List<List<LogicalVariable>> correlatedNullableVariableLists,
+ List<LogicalVariable> nonMissableVariableList, List<List<LogicalVariable>> correlatedMissableVariableLists,
ITypeEnvPointer... typeEnvs) throws AlgebricksException;
}
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/typing/PropagatingTypeEnvironment.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/typing/PropagatingTypeEnvironment.java
index 9d2a5da..27aa902 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/typing/PropagatingTypeEnvironment.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/typing/PropagatingTypeEnvironment.java
@@ -32,60 +32,74 @@
private final TypePropagationPolicy policy;
- private final IMissableTypeComputer nullableTypeComputer;
+ private final IMissableTypeComputer missableTypeComputer;
private final ITypeEnvPointer[] envPointers;
- private final List<LogicalVariable> nonNullVariables = new ArrayList<>();
+ private final List<LogicalVariable> nonMissableVariables = new ArrayList<>();
- private final List<List<LogicalVariable>> correlatedNullableVariableLists = new ArrayList<>();
+ private final List<List<LogicalVariable>> correlatedMissableVariableLists = new ArrayList<>();
public PropagatingTypeEnvironment(IExpressionTypeComputer expressionTypeComputer,
- IMissableTypeComputer nullableTypeComputer, IMetadataProvider<?, ?> metadataProvider,
+ IMissableTypeComputer missableTypeComputer, IMetadataProvider<?, ?> metadataProvider,
TypePropagationPolicy policy, ITypeEnvPointer[] envPointers) {
super(expressionTypeComputer, metadataProvider);
- this.nullableTypeComputer = nullableTypeComputer;
+ this.missableTypeComputer = missableTypeComputer;
this.policy = policy;
this.envPointers = envPointers;
}
@Override
public Object getVarType(LogicalVariable var) throws AlgebricksException {
- return getVarTypeFullList(var, nonNullVariables, correlatedNullableVariableLists);
+ return getVarTypeFullList(var, nonMissableVariables, correlatedMissableVariableLists);
}
- public List<LogicalVariable> getNonNullVariables() {
- return nonNullVariables;
+ public List<LogicalVariable> getNonMissableVariables() {
+ return nonMissableVariables;
}
public List<List<LogicalVariable>> getCorrelatedMissableVariableLists() {
- return correlatedNullableVariableLists;
+ return correlatedMissableVariableLists;
}
@Override
- public Object getVarType(LogicalVariable var, List<LogicalVariable> nonNullVariableList,
- List<List<LogicalVariable>> correlatedNullableVariableLists) throws AlgebricksException {
- for (LogicalVariable v : nonNullVariables) {
- if (!nonNullVariableList.contains(v)) {
- nonNullVariableList.add(v);
+ public Object getVarType(LogicalVariable var, List<LogicalVariable> nonMissableVariableList,
+ List<List<LogicalVariable>> correlatedMissableVariableLists) throws AlgebricksException {
+ for (LogicalVariable v : nonMissableVariables) {
+ if (!nonMissableVariableList.contains(v)) {
+ nonMissableVariableList.add(v);
}
}
- Object t = getVarTypeFullList(var, nonNullVariableList, correlatedNullableVariableLists);
- for (List<LogicalVariable> list : this.correlatedNullableVariableLists) {
- if (!correlatedNullableVariableLists.contains(list)) {
- correlatedNullableVariableLists.add(list);
+ Object t = getVarTypeFullList(var, nonMissableVariableList, correlatedMissableVariableLists);
+ for (List<LogicalVariable> list : correlatedMissableVariableLists) {
+ if (!correlatedMissableVariableLists.contains(list)) {
+ correlatedMissableVariableLists.add(list);
}
}
return t;
}
- private Object getVarTypeFullList(LogicalVariable var, List<LogicalVariable> nonNullVariableList,
- List<List<LogicalVariable>> correlatedNullableVariableLists) throws AlgebricksException {
+ private Object getVarTypeFullList(LogicalVariable var, List<LogicalVariable> nonMissableVariableList,
+ List<List<LogicalVariable>> correlatedMissableVariableLists) throws AlgebricksException {
Object t = varTypeMap.get(var);
if (t != null) {
return t;
}
- return policy.getVarType(var, nullableTypeComputer, nonNullVariableList, correlatedNullableVariableLists,
+ return policy.getVarType(var, missableTypeComputer, nonMissableVariableList, correlatedMissableVariableLists,
envPointers);
}
+
+ @Override
+ public boolean substituteProducedVariable(LogicalVariable v1, LogicalVariable v2) throws AlgebricksException {
+ boolean result = super.substituteProducedVariable(v1, v2);
+ if (nonMissableVariables.remove(v1)) {
+ nonMissableVariables.add(v2);
+ }
+ for (List<LogicalVariable> missableVarList : correlatedMissableVariableLists) {
+ if (missableVarList.remove(v1)) {
+ missableVarList.add(v2);
+ }
+ }
+ return result;
+ }
}