[NO ISSUE][COMP] Improve variable substitution

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
- Rename fields in PropagatingTypeEnvironment to
  align with their semantics
- PropagatingTypeEnvironment.substituteProducedVariable()
  should also substitute variables in 'nonMissableVariables'
  and 'correlatedMissableVariableLists'
- Minor improvements in SubstituteVariableVisitor

Change-Id: I8acafe7fae8fa53dc962fe260e48e8ff84dadb86
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/9603
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Dmitry Lychagin <dmitry.lychagin@couchbase.com>
Reviewed-by: Ali Alsuliman <ali.al.solaiman@gmail.com>
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/AssignOperator.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/AssignOperator.java
index 202c291..ade9552 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/AssignOperator.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/AssignOperator.java
@@ -23,11 +23,9 @@
 import org.apache.commons.lang3.mutable.Mutable;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
 import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
 import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
 import org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
 import org.apache.hyracks.algebricks.core.algebra.properties.LocalOrderProperty;
 import org.apache.hyracks.algebricks.core.algebra.properties.VariablePropagationPolicy;
 import org.apache.hyracks.algebricks.core.algebra.typing.ITypingContext;
@@ -80,15 +78,6 @@
         for (int i = 0; i < n; i++) {
             env.setVarType(variables.get(i), ctx.getExpressionTypeComputer().getType(expressions.get(i).getValue(),
                     ctx.getMetadataProvider(), env));
-            if (expressions.get(i).getValue().getExpressionTag() == LogicalExpressionTag.VARIABLE) {
-                LogicalVariable var =
-                        ((VariableReferenceExpression) expressions.get(i).getValue()).getVariableReference();
-                for (List<LogicalVariable> list : env.getCorrelatedMissableVariableLists()) {
-                    if (list.contains(var)) {
-                        list.add(variables.get(i));
-                    }
-                }
-            }
         }
         return env;
     }
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterJoinOperator.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterJoinOperator.java
index 797c5eb..4e382d2 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterJoinOperator.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterJoinOperator.java
@@ -67,8 +67,8 @@
         PropagatingTypeEnvironment env =
                 new PropagatingTypeEnvironment(ctx.getExpressionTypeComputer(), ctx.getMissableTypeComputer(),
                         ctx.getMetadataProvider(), TypePropagationPolicy.LEFT_OUTER, envPointers);
-        List<LogicalVariable> liveVars = new ArrayList<LogicalVariable>();
-        VariableUtilities.getLiveVariables(inputs.get(1).getValue(), liveVars); // live variables from outer branch can be null together
+        List<LogicalVariable> liveVars = new ArrayList<>();
+        VariableUtilities.getLiveVariables(inputs.get(1).getValue(), liveVars); // live variables from right branch can be MISSING together
         env.getCorrelatedMissableVariableLists().add(liveVars);
         return env;
     }
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestMapOperator.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestMapOperator.java
index 6bacdb4..cd009c0 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestMapOperator.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestMapOperator.java
@@ -18,7 +18,6 @@
  */
 package org.apache.hyracks.algebricks.core.algebra.operators.logical;
 
-import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.commons.lang3.mutable.Mutable;
@@ -61,10 +60,7 @@
         // Propagates all input variables that come from the outer branch.
         PropagatingTypeEnvironment env = createPropagatingAllInputsTypeEnvironment(ctx);
 
-        env.getCorrelatedMissableVariableLists().add(new ArrayList<>(variables));
-
-        // For the variables from the inner branch, the output type is the union
-        // of (original type + null).
+        // The produced variables of the this operator are missable because of the left outer semantics.
         for (int i = 0; i < variables.size(); i++) {
             env.setVarType(variables.get(i), ctx.getMissableTypeComputer().makeMissableType(variableTypes.get(i)));
         }
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestOperator.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestOperator.java
index 8c95a3f..14996dd 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestOperator.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestOperator.java
@@ -14,9 +14,6 @@
  */
 package org.apache.hyracks.algebricks.core.algebra.operators.logical;
 
-import java.util.ArrayList;
-import java.util.List;
-
 import org.apache.commons.lang3.mutable.Mutable;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
@@ -46,21 +43,14 @@
     @Override
     public IVariableTypeEnvironment computeOutputTypeEnvironment(ITypingContext ctx) throws AlgebricksException {
         PropagatingTypeEnvironment env = createPropagatingAllInputsTypeEnvironment(ctx);
+
+        // The produced variables of the this operator are missable because of the left outer semantics.
         Object t = env.getType(expression.getValue());
-        // For the variables from the inner branch, the output type is the union
-        // of (original type + missing).
         env.setVarType(variables.get(0), ctx.getMissableTypeComputer().makeMissableType(t));
         if (positionalVariable != null) {
             env.setVarType(positionalVariable, ctx.getMissableTypeComputer().makeMissableType(positionalVariableType));
         }
 
-        // The produced variables of the this operator are missable because of the left outer semantics.
-        List<LogicalVariable> missableVars = new ArrayList<>();
-        missableVars.add(variables.get(0));
-        if (positionalVariable != null) {
-            missableVars.add(positionalVariable);
-        }
-        env.getCorrelatedMissableVariableLists().add(missableVars);
         return env;
     }
 
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/SelectOperator.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/SelectOperator.java
index 26ce137..b2e2dfd 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/SelectOperator.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/SelectOperator.java
@@ -113,7 +113,7 @@
                 ILogicalExpression a2 = f2.getArguments().get(0).getValue();
                 if (a2.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
                     LogicalVariable var = ((VariableReferenceExpression) a2).getVariableReference();
-                    env.getNonNullVariables().add(var);
+                    env.getNonMissableVariables().add(var);
                 }
             }
         }
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/SubstituteVariableVisitor.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/SubstituteVariableVisitor.java
index 0b1bf5b..d1a1c03 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/SubstituteVariableVisitor.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/SubstituteVariableVisitor.java
@@ -169,8 +169,9 @@
         if (!producedVarFound) {
             substInNestedPlans(op, pair.first, pair.second);
         }
-        // always call substProducedVarInTypeEnvironment() because GroupByOperator.computeOutputTypeEnvironment()
-        // adds used vars into output type environment in some cases.
+        // GROUP BY operator may add its used variables
+        // to its own output type environment as produced variables
+        // therefore we need perform variable substitution in its own type environment
         // TODO (dmitry): this needs to be revisited
         substProducedVarInTypeEnvironment(op, pair);
         return null;
@@ -187,6 +188,10 @@
     public Void visitLeftOuterJoinOperator(LeftOuterJoinOperator op, Pair<LogicalVariable, LogicalVariable> pair)
             throws AlgebricksException {
         substUsedVariablesInExpr(op.getCondition(), pair.first, pair.second);
+        // LEFT OUTER JOIN operator adds its right branch variables
+        // to its own output type environment as 'correlatedMissableVariables'
+        // therefore we need perform variable substitution in its own type environment
+        substProducedVarInTypeEnvironment(op, pair);
         return null;
     }
 
@@ -245,8 +250,13 @@
     }
 
     @Override
-    public Void visitSelectOperator(SelectOperator op, Pair<LogicalVariable, LogicalVariable> pair) {
+    public Void visitSelectOperator(SelectOperator op, Pair<LogicalVariable, LogicalVariable> pair)
+            throws AlgebricksException {
         substUsedVariablesInExpr(op.getCondition(), pair.first, pair.second);
+        // SELECT operator may add its used variable
+        // to its own output type environment as 'nonMissableVariable' (not(is-missing($used_var))
+        // therefore we need perform variable substitution in its own type environment
+        substProducedVarInTypeEnvironment(op, pair);
         return null;
     }
 
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/TypePropagationPolicy.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/TypePropagationPolicy.java
index 9d60370..c37c674 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/TypePropagationPolicy.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/TypePropagationPolicy.java
@@ -31,21 +31,22 @@
 
         @Override
         public Object getVarType(LogicalVariable var, IMissableTypeComputer ntc,
-                List<LogicalVariable> nonNullVariableList, List<List<LogicalVariable>> correlatedNullableVariableLists,
-                ITypeEnvPointer... typeEnvs) throws AlgebricksException {
+                List<LogicalVariable> nonMissableVariableList,
+                List<List<LogicalVariable>> correlatedMissableVariableLists, ITypeEnvPointer... typeEnvs)
+                throws AlgebricksException {
             for (ITypeEnvPointer p : typeEnvs) {
                 IVariableTypeEnvironment env = p.getTypeEnv();
                 if (env == null) {
                     throw new AlgebricksException(
                             "Null environment for pointer " + p + " in getVarType for var=" + var);
                 }
-                Object t = env.getVarType(var, nonNullVariableList, correlatedNullableVariableLists);
+                Object t = env.getVarType(var, nonMissableVariableList, correlatedMissableVariableLists);
                 if (t != null) {
                     if (ntc != null && ntc.canBeMissing(t)) {
-                        for (List<LogicalVariable> list : correlatedNullableVariableLists) {
+                        for (List<LogicalVariable> list : correlatedMissableVariableLists) {
                             if (list.contains(var)) {
                                 for (LogicalVariable v : list) {
-                                    if (nonNullVariableList.contains(v)) {
+                                    if (nonMissableVariableList.contains(v)) {
                                         return ntc.getNonOptionalType(t);
                                     }
                                 }
@@ -63,16 +64,17 @@
 
         @Override
         public Object getVarType(LogicalVariable var, IMissableTypeComputer ntc,
-                List<LogicalVariable> nonNullVariableList, List<List<LogicalVariable>> correlatedNullableVariableLists,
-                ITypeEnvPointer... typeEnvs) throws AlgebricksException {
+                List<LogicalVariable> nonMissableVariableList,
+                List<List<LogicalVariable>> correlatedMissableVariableLists, ITypeEnvPointer... typeEnvs)
+                throws AlgebricksException {
             int n = typeEnvs.length;
             // Searches from the inner branch to the outer branch.
             // TODO(buyingyi): A split operator could lead to the case that the type for a variable could be
             // found in both inner and outer branches. Fix computeOutputTypeEnvironment() in ProjectOperator
             // and investigate why many test queries fail if only live variables' types are propagated.
             for (int i = n - 1; i >= 0; i--) {
-                Object t =
-                        typeEnvs[i].getTypeEnv().getVarType(var, nonNullVariableList, correlatedNullableVariableLists);
+                Object t = typeEnvs[i].getTypeEnv().getVarType(var, nonMissableVariableList,
+                        correlatedMissableVariableLists);
                 if (t == null) {
                     continue;
                 }
@@ -82,7 +84,7 @@
 
                 // inner branch
                 boolean nonMissingVarIsProduced = false;
-                for (LogicalVariable v : nonNullVariableList) {
+                for (LogicalVariable v : nonMissableVariableList) {
                     boolean toBreak = false;
                     if (v == var) {
                         nonMissingVarIsProduced = true;
@@ -106,6 +108,6 @@
     };
 
     public abstract Object getVarType(LogicalVariable var, IMissableTypeComputer ntc,
-            List<LogicalVariable> nonNullVariableList, List<List<LogicalVariable>> correlatedNullableVariableLists,
+            List<LogicalVariable> nonMissableVariableList, List<List<LogicalVariable>> correlatedMissableVariableLists,
             ITypeEnvPointer... typeEnvs) throws AlgebricksException;
 }
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/typing/PropagatingTypeEnvironment.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/typing/PropagatingTypeEnvironment.java
index 9d2a5da..27aa902 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/typing/PropagatingTypeEnvironment.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/typing/PropagatingTypeEnvironment.java
@@ -32,60 +32,74 @@
 
     private final TypePropagationPolicy policy;
 
-    private final IMissableTypeComputer nullableTypeComputer;
+    private final IMissableTypeComputer missableTypeComputer;
 
     private final ITypeEnvPointer[] envPointers;
 
-    private final List<LogicalVariable> nonNullVariables = new ArrayList<>();
+    private final List<LogicalVariable> nonMissableVariables = new ArrayList<>();
 
-    private final List<List<LogicalVariable>> correlatedNullableVariableLists = new ArrayList<>();
+    private final List<List<LogicalVariable>> correlatedMissableVariableLists = new ArrayList<>();
 
     public PropagatingTypeEnvironment(IExpressionTypeComputer expressionTypeComputer,
-            IMissableTypeComputer nullableTypeComputer, IMetadataProvider<?, ?> metadataProvider,
+            IMissableTypeComputer missableTypeComputer, IMetadataProvider<?, ?> metadataProvider,
             TypePropagationPolicy policy, ITypeEnvPointer[] envPointers) {
         super(expressionTypeComputer, metadataProvider);
-        this.nullableTypeComputer = nullableTypeComputer;
+        this.missableTypeComputer = missableTypeComputer;
         this.policy = policy;
         this.envPointers = envPointers;
     }
 
     @Override
     public Object getVarType(LogicalVariable var) throws AlgebricksException {
-        return getVarTypeFullList(var, nonNullVariables, correlatedNullableVariableLists);
+        return getVarTypeFullList(var, nonMissableVariables, correlatedMissableVariableLists);
     }
 
-    public List<LogicalVariable> getNonNullVariables() {
-        return nonNullVariables;
+    public List<LogicalVariable> getNonMissableVariables() {
+        return nonMissableVariables;
     }
 
     public List<List<LogicalVariable>> getCorrelatedMissableVariableLists() {
-        return correlatedNullableVariableLists;
+        return correlatedMissableVariableLists;
     }
 
     @Override
-    public Object getVarType(LogicalVariable var, List<LogicalVariable> nonNullVariableList,
-            List<List<LogicalVariable>> correlatedNullableVariableLists) throws AlgebricksException {
-        for (LogicalVariable v : nonNullVariables) {
-            if (!nonNullVariableList.contains(v)) {
-                nonNullVariableList.add(v);
+    public Object getVarType(LogicalVariable var, List<LogicalVariable> nonMissableVariableList,
+            List<List<LogicalVariable>> correlatedMissableVariableLists) throws AlgebricksException {
+        for (LogicalVariable v : nonMissableVariables) {
+            if (!nonMissableVariableList.contains(v)) {
+                nonMissableVariableList.add(v);
             }
         }
-        Object t = getVarTypeFullList(var, nonNullVariableList, correlatedNullableVariableLists);
-        for (List<LogicalVariable> list : this.correlatedNullableVariableLists) {
-            if (!correlatedNullableVariableLists.contains(list)) {
-                correlatedNullableVariableLists.add(list);
+        Object t = getVarTypeFullList(var, nonMissableVariableList, correlatedMissableVariableLists);
+        for (List<LogicalVariable> list : correlatedMissableVariableLists) {
+            if (!correlatedMissableVariableLists.contains(list)) {
+                correlatedMissableVariableLists.add(list);
             }
         }
         return t;
     }
 
-    private Object getVarTypeFullList(LogicalVariable var, List<LogicalVariable> nonNullVariableList,
-            List<List<LogicalVariable>> correlatedNullableVariableLists) throws AlgebricksException {
+    private Object getVarTypeFullList(LogicalVariable var, List<LogicalVariable> nonMissableVariableList,
+            List<List<LogicalVariable>> correlatedMissableVariableLists) throws AlgebricksException {
         Object t = varTypeMap.get(var);
         if (t != null) {
             return t;
         }
-        return policy.getVarType(var, nullableTypeComputer, nonNullVariableList, correlatedNullableVariableLists,
+        return policy.getVarType(var, missableTypeComputer, nonMissableVariableList, correlatedMissableVariableLists,
                 envPointers);
     }
+
+    @Override
+    public boolean substituteProducedVariable(LogicalVariable v1, LogicalVariable v2) throws AlgebricksException {
+        boolean result = super.substituteProducedVariable(v1, v2);
+        if (nonMissableVariables.remove(v1)) {
+            nonMissableVariables.add(v2);
+        }
+        for (List<LogicalVariable> missableVarList : correlatedMissableVariableLists) {
+            if (missableVarList.remove(v1)) {
+                missableVarList.add(v2);
+            }
+        }
+        return result;
+    }
 }