[ASTERIXDB-3568][COMP] Add transitive closure INNER join predicates

Ext-ref: MB-66030

Change-Id: Ifea063cae4a52802a3342126623b7539dc811abd
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19560
Reviewed-by: <murali.krishna@couchbase.com>
Reviewed-by: <preethampoluparthi@gmail.com>
Reviewed-by: Ian Maxon <imaxon@apache.org>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java
index fa80b78..f37f8a3 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java
@@ -38,8 +38,11 @@
     protected int datasetBits;
     // used for triangle detection; we strictly do not mean left and right here.
     // first and second sides would be more appropriate
+    protected int leftSide;
+    protected int rightSide;
     protected int leftSideBits;
     protected int rightSideBits;
+    protected int numLeafInputs;
     protected double selectivity;
     protected comparisonOp comparisonType;
     protected JoinOperator joinOp = null;
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
index e77e066..11a9c95 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
@@ -529,19 +529,26 @@
             }
             jc.numberOfVars = usedVars.size();
 
+            List<Integer> leafInputNumbers = new ArrayList<>(jc.numberOfVars);
             for (int i = 0; i < jc.numberOfVars; i++) {
-                int bits = 1 << (varLeafInputIds.get(usedVars.get(i)) - 1);
-                if (bits != JoinCondition.NO_JC) {
-                    if (i == 0) {
-                        jc.leftSideBits = bits;
-                    } else if (i == 1) {
-                        jc.rightSideBits = bits;
-                    } else {
-                        // have to deal with preds such as r.a + s.a = 5 OR r.a + s.a = t.a
-                    }
-                    jc.datasetBits |= bits;
+                int idx = varLeafInputIds.get(usedVars.get(i));
+                if (!leafInputNumbers.contains(idx)) {
+                    leafInputNumbers.add(idx);
                 }
             }
+            jc.numLeafInputs = leafInputNumbers.size();
+            for (int i = 0; i < jc.numLeafInputs; i++) {
+                int side = leafInputNumbers.get(i);
+                int bits = 1 << (side - 1);
+                if (i == 0) {
+                    jc.leftSide = side;
+                    jc.leftSideBits = bits;
+                } else if (i == 1) {
+                    jc.rightSide = side;
+                    jc.rightSideBits = bits;
+                }
+                jc.datasetBits |= bits;
+            }
         }
     }
 
@@ -1208,7 +1215,7 @@
         if (this.singleDatasetPreds.size() > 0) {
             for (JoinCondition jc : joinConditions) {
                 // we may be repeating some work here, but that is ok. This will rarely happen (happens in q7 tpch)
-                double sel = stats.getSelectivityFromAnnotationMain(jc.getJoinCondition(), false, true, null);
+                double sel = stats.getSelectivityFromAnnotationMain(jc, null, false, true, null);
                 if (sel != -1) {
                     jc.selectivity = sel;
                 }
@@ -1236,7 +1243,7 @@
         }
 
         for (JoinCondition jc : joinConditions) {
-            jc.selectivity = stats.getSelectivityFromAnnotationMain(jc.joinCondition, true, false, jc.joinOp);
+            jc.selectivity = stats.getSelectivityFromAnnotationMain(jc, null, true, false, jc.joinOp);
         }
 
         findSelectionPredsInsideJoins(); // This was added to make TPCH Q7 work.
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
index edd4301..2aa93a7 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
@@ -570,36 +570,35 @@
         double redundantSel = 1.0;
         List<JoinCondition> joinConditions = joinEnum.getJoinConditions();
         JoinCondition jc1, jc2, jc3;
-        int[] vertices = new int[6];
-        int[] verticesCopy = new int[6];
+        String[] vars = new String[6];
+        String[] varsCopy = new String[6];
         for (int i = 0; i <= applicablePredicatesInCurrentJn.size() - 3; i++) {
             jc1 = joinConditions.get(applicablePredicatesInCurrentJn.get(i));
-            if (jc1.partOfComposite || jc1.deleted) {
+            if (jc1.deleted || jc1.usedVars == null) {
                 continue; // must ignore these or the same triangles will be found more than once.
             }
-            vertices[0] = jc1.leftSideBits;
-            vertices[1] = jc1.rightSideBits;
+            vars[0] = jc1.usedVars.get(0).toString();
+            vars[1] = jc1.usedVars.get(1).toString();
             for (int j = i + 1; j <= applicablePredicatesInCurrentJn.size() - 2; j++) {
                 jc2 = joinConditions.get(applicablePredicatesInCurrentJn.get(j));
-                if (jc2.partOfComposite || jc2.deleted) {
+                if (jc2.deleted || jc2.usedVars == null) {
                     continue;
                 }
-                vertices[2] = jc2.leftSideBits;
-                vertices[3] = jc2.rightSideBits;
+                vars[2] = jc2.usedVars.get(0).toString();
+                vars[3] = jc2.usedVars.get(1).toString();
                 for (int k = j + 1; k <= applicablePredicatesInCurrentJn.size() - 1; k++) {
                     jc3 = joinConditions.get(applicablePredicatesInCurrentJn.get(k));
-                    if (jc3.partOfComposite || jc3.deleted) {
+                    if (jc3.deleted || jc3.usedVars == null) {
                         continue;
                     }
-                    vertices[4] = jc3.leftSideBits;
-                    vertices[5] = jc3.rightSideBits;
+                    vars[4] = jc3.usedVars.get(0).toString();
+                    vars[5] = jc3.usedVars.get(1).toString();
 
-                    System.arraycopy(vertices, 0, verticesCopy, 0, 6);
-                    Arrays.sort(verticesCopy);
-                    if (verticesCopy[0] == verticesCopy[1] && verticesCopy[2] == verticesCopy[3]
-                            && verticesCopy[4] == verticesCopy[5]) {
+                    System.arraycopy(vars, 0, varsCopy, 0, 6);
+                    Arrays.sort(varsCopy);
+                    if (varsCopy[0] == varsCopy[1] && varsCopy[2] == varsCopy[3] && varsCopy[4] == varsCopy[5]) {
                         // redundant edge found
-                        if (!(jc1.deleted || jc2.deleted || jc3.deleted)) {
+                        if (!(jc1.deleted || jc2.deleted)) {
                             redundantSel *= adjustSelectivities(jc1, jc2, jc3);
                         }
                     }
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index e1aab84..32c70cb 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -114,24 +114,18 @@
         return mdp.findSampleIndex(dsid.getDatabaseName(), dsid.getDataverseName(), dsid.getDatasourceName());
     }
 
-    private double findJoinSelectivity(JoinProductivityAnnotation anno, AbstractFunctionCallExpression joinExpr,
-            JoinOperator jOp) throws AlgebricksException {
+    private double findJoinSelectivity(JoinCondition jc, JoinProductivityAnnotation anno,
+            AbstractFunctionCallExpression joinExpr, JoinOperator jOp) throws AlgebricksException {
         List<LogicalVariable> exprUsedVars = new ArrayList<>();
         joinExpr.getUsedVariables(exprUsedVars);
-        if (exprUsedVars.size() != 2) {
-            // Since there is a left and right dataset here, expecting only two variables.
+
+        if (jc.numLeafInputs != 2) {
+            // we can only deal with binary joins. More checks should be in place as well such as R.a op S.a
             return 1.0;
         }
 
-        int idx1, idx2;
-        if (joinEnum.varLeafInputIds.containsKey(exprUsedVars.get(0))) {
-            idx1 = joinEnum.varLeafInputIds.get(exprUsedVars.get(0));
-        } else
-            return 1.0;
-        if (joinEnum.varLeafInputIds.containsKey(exprUsedVars.get(1))) {
-            idx2 = joinEnum.varLeafInputIds.get(exprUsedVars.get(1));
-        } else
-            return 1.0;
+        int idx1 = jc.leftSide;
+        int idx2 = jc.rightSide;
 
         if (joinEnum.jnArray[idx1].getFake()) {
             return 1.0;
@@ -187,21 +181,19 @@
                 return 0.5; // this may not be accurate obviously!
             } // we can do all relops here and other joins such as interval joins and spatial joins, the compile time might increase a lot
 
-            //If one of the tables is smaller than the target sample size, we can join the samples directly
-            // to get a good estimate of the join selectivity.
             Index.SampleIndexDetails idxDetails1 = (Index.SampleIndexDetails) index1.getIndexDetails();
             Index.SampleIndexDetails idxDetails2 = (Index.SampleIndexDetails) index2.getIndexDetails();
             if ((idxDetails1.getSourceCardinality() < idxDetails1.getSampleCardinalityTarget())
-                    || (idxDetails2.getSourceCardinality() < idxDetails2.getSampleCardinalityTarget())) {
+                    || (idxDetails2.getSourceCardinality() < idxDetails2.getSampleCardinalityTarget())
+                    || exprUsedVars.size() > 2) { //* if there are more than 2 variables, it is not a simple join like r.a op s.a
                 double sel = findJoinSelFromSamples(joinEnum.leafInputs.get(idx1 - 1),
                         joinEnum.leafInputs.get(idx2 - 1), index1, index2, joinExpr, jOp);
-
                 if (sel == 0.0) {
-                    sel = 1.0 / Math.max(card1, card2); // R.uniq = S.uniq is nicely modelled here. Good heuristic Best we can do so far.
+                    sel = 1.0 / Math.max(card1, card2);
                 }
                 return sel;
             }
-            // Now we can handle only equi joins. We make all the uniform and independence assumptions here. Works well for Pk-FK joins.
+            // Now we can handle only equi joins. We make all the uniform and independence assumptions here.
             double sel = naiveJoinSelectivity(exprUsedVars, card1, card2, idx1, idx2);
             return sel;
         }
@@ -290,7 +282,7 @@
 
     // The expression we get may not be a base condition. It could be comprised of ors and ands and nots. So have to
     //recursively find the overall selectivity.
-    private double getSelectivityFromAnnotation(AbstractFunctionCallExpression afcExpr, boolean join,
+    private double getSelectivityFromAnnotation(JoinCondition jc, AbstractFunctionCallExpression afcExpr, boolean join,
             boolean singleDatasetPreds, JoinOperator jOp) throws AlgebricksException {
         double sel = 1.0;
         if (afcExpr.getFunctionIdentifier().equals(AlgebricksBuiltinFunctions.OR)) {
@@ -298,7 +290,7 @@
             for (int i = 0; i < afcExpr.getArguments().size(); i++) {
                 ILogicalExpression lexpr = afcExpr.getArguments().get(i).getValue();
                 if (lexpr.getExpressionTag().equals(LogicalExpressionTag.FUNCTION_CALL)) {
-                    sel = getSelectivityFromAnnotation(
+                    sel = getSelectivityFromAnnotation(jc,
                             (AbstractFunctionCallExpression) afcExpr.getArguments().get(i).getValue(), join,
                             singleDatasetPreds, jOp);
                     orSel = orSel + sel - orSel * sel;
@@ -310,7 +302,7 @@
             for (int i = 0; i < afcExpr.getArguments().size(); i++) {
                 ILogicalExpression lexpr = afcExpr.getArguments().get(i).getValue();
                 if (lexpr.getExpressionTag().equals(LogicalExpressionTag.FUNCTION_CALL)) {
-                    sel = getSelectivityFromAnnotation(
+                    sel = getSelectivityFromAnnotation(jc,
                             (AbstractFunctionCallExpression) afcExpr.getArguments().get(i).getValue(), join,
                             singleDatasetPreds, jOp);
                     andSel *= sel;
@@ -320,7 +312,7 @@
         } else if (afcExpr.getFunctionIdentifier().equals(AlgebricksBuiltinFunctions.NOT)) {
             ILogicalExpression lexpr = afcExpr.getArguments().get(0).getValue();
             if (lexpr.getExpressionTag().equals(LogicalExpressionTag.FUNCTION_CALL)) {
-                sel = getSelectivityFromAnnotation(
+                sel = getSelectivityFromAnnotation(jc,
                         (AbstractFunctionCallExpression) afcExpr.getArguments().get(0).getValue(), join,
                         singleDatasetPreds, jOp);
                 // We want to return 1.0 and not 0.0 if there was no annotation
@@ -351,7 +343,7 @@
             }
         } else {
             JoinProductivityAnnotation jpa = afcExpr.getAnnotation(JoinProductivityAnnotation.class);
-            s = findJoinSelectivity(jpa, afcExpr, jOp);
+            s = findJoinSelectivity(jc, jpa, afcExpr, jOp);
             sel *= s;
         }
 
@@ -365,13 +357,20 @@
         return sel;
     }
 
-    protected double getSelectivityFromAnnotationMain(ILogicalExpression leExpr, boolean join,
+    protected double getSelectivityFromAnnotationMain(JoinCondition jc, ILogicalExpression expr, boolean join,
             boolean singleDatasetPreds, JoinOperator jOp) throws AlgebricksException {
         double sel = 1.0;
 
+        ILogicalExpression leExpr;
+
+        if (jc != null)
+            leExpr = jc.joinCondition;
+        else
+            leExpr = expr;
+
         if (leExpr.getExpressionTag().equals(LogicalExpressionTag.FUNCTION_CALL)) {
             AbstractFunctionCallExpression afcExpr = (AbstractFunctionCallExpression) leExpr;
-            sel = getSelectivityFromAnnotation(afcExpr, join, singleDatasetPreds, jOp);
+            sel = getSelectivityFromAnnotation(jc, afcExpr, join, singleDatasetPreds, jOp);
         }
 
         return sel;
@@ -389,7 +388,7 @@
         while (op.getOperatorTag() != LogicalOperatorTag.EMPTYTUPLESOURCE) {
             if (op.getOperatorTag() == LogicalOperatorTag.SELECT) {
                 SelectOperator selOper = (SelectOperator) op;
-                sel *= getSelectivityFromAnnotationMain(selOper.getCondition().getValue(), join, false, null);
+                sel *= getSelectivityFromAnnotationMain(null, selOper.getCondition().getValue(), join, false, null);
             }
             if (op.getOperatorTag() == LogicalOperatorTag.SUBPLAN) {
                 sel *= getSelectivity((SubplanOperator) op);
@@ -406,7 +405,7 @@
         while (true) {
             if (op.getOperatorTag() == LogicalOperatorTag.SELECT) {
                 SelectOperator selOper = (SelectOperator) op;
-                sel *= getSelectivityFromAnnotationMain(selOper.getCondition().getValue(), false, false, null);
+                sel *= getSelectivityFromAnnotationMain(null, selOper.getCondition().getValue(), false, false, null);
             }
             if (op.getInputs().size() > 0) {
                 op = op.getInputs().get(0).getValue();
diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results_cbo/ch2/ch2_q5.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results_cbo/ch2/ch2_q5.plan
index 8a2135d..a1f1fce 100644
--- a/asterixdb/asterix-app/src/test/resources/optimizerts/results_cbo/ch2/ch2_q5.plan
+++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results_cbo/ch2/ch2_q5.plan
@@ -36,33 +36,33 @@
                         -- STREAM_PROJECT  |PARTITIONED|
                           exchange [cardinality: 9.223372036854776E16, doc-size: -6.0, op-cost: 0.0, total-cost: 9.223372036854776E16]
                           -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                            join (and(eq($$266, $$274), eq($$290, $$269))) [cardinality: 9.223372036854776E16, doc-size: -6.0, op-cost: 9.223372036854776E16, total-cost: 9.223372036854776E16]
-                            -- HYBRID_HASH_JOIN [$$274, $$290][$$266, $$269]  |PARTITIONED|
-                              exchange [cardinality: 9.223372036854776E16, doc-size: -5.0, op-cost: 0.0, total-cost: 9.223372036854776E16]
+                            join (eq($$256, $$257)) [cardinality: 9.223372036854776E16, doc-size: -6.0, op-cost: 9.223372036854776E16, total-cost: 9.223372036854776E16]
+                            -- HYBRID_HASH_JOIN [$$256][$$257]  |PARTITIONED|
+                              exchange [cardinality: 9.223372036854776E16, doc-size: -5.0, op-cost: 0.0, total-cost: 2.50025E11]
                               -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                assign [$$290] <- [numeric-mod(numeric-multiply($$245, $$246), 10000)] project: [$$280, $$275, $$274, $$290] [cardinality: 9.223372036854776E16, doc-size: -5.0, op-cost: 0.0, total-cost: 9.223372036854776E16]
-                                -- ASSIGN  |PARTITIONED|
-                                  project ([$$280, $$275, $$274, $$245, $$246]) [cardinality: 9.223372036854776E16, doc-size: -5.0, op-cost: 0.0, total-cost: 9.223372036854776E16]
-                                  -- STREAM_PROJECT  |PARTITIONED|
-                                    exchange [cardinality: 9.223372036854776E16, doc-size: -5.0, op-cost: 0.0, total-cost: 9.223372036854776E16]
-                                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                      join (eq($$256, $$257)) [cardinality: 9.223372036854776E16, doc-size: -5.0, op-cost: 9.223372036854776E16, total-cost: 9.223372036854776E16]
-                                      -- HYBRID_HASH_JOIN [$$256][$$257]  |PARTITIONED|
-                                        exchange [cardinality: 9.223372036854776E16, doc-size: -4.0, op-cost: 0.0, total-cost: 5.0002E11]
-                                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                          project ([$$280, $$275, $$274, $$245, $$246, $$256]) [cardinality: 9.223372036854776E16, doc-size: -4.0, op-cost: 0.0, total-cost: 5.0002E11]
-                                          -- STREAM_PROJECT  |PARTITIONED|
-                                            exchange [cardinality: 9.223372036854776E16, doc-size: -4.0, op-cost: 0.0, total-cost: 5.0002E11]
-                                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                              join (eq($$258, $$274)) [cardinality: 9.223372036854776E16, doc-size: -4.0, op-cost: 5.00004E11, total-cost: 5.0002E11]
-                                              -- HYBRID_HASH_JOIN [$$258][$$274]  |PARTITIONED|
-                                                exchange [cardinality: 5.0E11, doc-size: -3.0, op-cost: 0.0, total-cost: 1.1E7]
-                                                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                                  project ([$$280, $$245, $$246, $$258]) [cardinality: 5.0E11, doc-size: -3.0, op-cost: 0.0, total-cost: 1.1E7]
+                                project ([$$280, $$275, $$256]) [cardinality: 9.223372036854776E16, doc-size: -5.0, op-cost: 0.0, total-cost: 2.50025E11]
+                                -- STREAM_PROJECT  |PARTITIONED|
+                                  exchange [cardinality: 9.223372036854776E16, doc-size: -5.0, op-cost: 0.0, total-cost: 2.50025E11]
+                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                    join (eq($$266, $$274)) [cardinality: 9.223372036854776E16, doc-size: -5.0, op-cost: 2.50004E11, total-cost: 2.50025E11]
+                                    -- HYBRID_HASH_JOIN [$$266][$$274]  |PARTITIONED|
+                                      exchange [cardinality: 2.5E11, doc-size: -4.0, op-cost: 0.0, total-cost: 1.6E7]
+                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                        project ([$$280, $$266]) [cardinality: 2.5E11, doc-size: -4.0, op-cost: 0.0, total-cost: 1.6E7]
+                                        -- STREAM_PROJECT  |PARTITIONED|
+                                          exchange [cardinality: 2.5E11, doc-size: -4.0, op-cost: 0.0, total-cost: 1.6E7]
+                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                            join (and(eq($$290, $$269), eq($$266, $$258))) [cardinality: 2.5E11, doc-size: -4.0, op-cost: 2000000.0, total-cost: 1.6E7]
+                                            -- HYBRID_HASH_JOIN [$$290, $$258][$$269, $$266]  |PARTITIONED|
+                                              exchange [cardinality: 1000000.0, doc-size: -3.0, op-cost: 0.0, total-cost: 1.1E7]
+                                              -- HASH_PARTITION_EXCHANGE [$$290, $$258]  |PARTITIONED|
+                                                assign [$$290] <- [numeric-mod(numeric-multiply($$245, $$246), 10000)] project: [$$280, $$290, $$258] [cardinality: 1000000.0, doc-size: -3.0, op-cost: 0.0, total-cost: 1.1E7]
+                                                -- ASSIGN  |PARTITIONED|
+                                                  project ([$$280, $$245, $$246, $$258]) [cardinality: 1000000.0, doc-size: -3.0, op-cost: 0.0, total-cost: 1.1E7]
                                                   -- STREAM_PROJECT  |PARTITIONED|
-                                                    exchange [cardinality: 5.0E11, doc-size: -3.0, op-cost: 0.0, total-cost: 1.1E7]
+                                                    exchange [cardinality: 1000000.0, doc-size: -3.0, op-cost: 0.0, total-cost: 1.1E7]
                                                     -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                                      join (and(eq($$276, $$260), eq($$277, $$254), eq($$278, $$263))) [cardinality: 5.0E11, doc-size: -3.0, op-cost: 2000000.0, total-cost: 1.1E7]
+                                                      join (and(eq($$276, $$260), eq($$277, $$254), eq($$278, $$263))) [cardinality: 1000000.0, doc-size: -3.0, op-cost: 2000000.0, total-cost: 1.1E7]
                                                       -- HYBRID_HASH_JOIN [$$260, $$254, $$263][$$276, $$277, $$278]  |PARTITIONED|
                                                         exchange [cardinality: 1000000.0, doc-size: -2.0, op-cost: 0.0, total-cost: 6000000.0]
                                                         -- HASH_PARTITION_EXCHANGE [$$260, $$254, $$263]  |PARTITIONED|
@@ -120,47 +120,47 @@
                                                                   -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                                                     empty-tuple-source [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
                                                                     -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
-                                                exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
-                                                -- BROADCAST_EXCHANGE  |PARTITIONED|
-                                                  assign [$$275, $$274, $$256] <- [$$n.getField("n_name"), $$n.getField("n_nationkey"), $$n.getField("n_regionkey")] project: [$$275, $$274, $$256] [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
-                                                  -- ASSIGN  |PARTITIONED|
-                                                    project ([$$n]) [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
-                                                    -- STREAM_PROJECT  |PARTITIONED|
-                                                      exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
-                                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                                        data-scan []<-[$$250, $$n] <- test.nation [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
-                                                        -- DATASOURCE_SCAN  |PARTITIONED|
-                                                          exchange [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
-                                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                                            empty-tuple-source [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
-                                                            -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
-                                        exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
-                                        -- BROADCAST_EXCHANGE  |PARTITIONED|
-                                          select (eq($$r.getField("r_name"), "Asia")) project: [$$257] [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
-                                          -- STREAM_SELECT  |PARTITIONED|
-                                            assign [$$257] <- [$$r.getField("r_regionkey")] [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
-                                            -- ASSIGN  |PARTITIONED|
-                                              project ([$$r]) [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
-                                              -- STREAM_PROJECT  |PARTITIONED|
-                                                exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
-                                                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                                  data-scan []<-[$$251, $$r] <- test.region [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
-                                                  -- DATASOURCE_SCAN  |PARTITIONED|
-                                                    exchange [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
+                                              exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                              -- HASH_PARTITION_EXCHANGE [$$269, $$266]  |PARTITIONED|
+                                                assign [$$269, $$266] <- [$$su.getField("su_suppkey"), $$su.getField("su_nationkey")] project: [$$266, $$269] [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                                -- ASSIGN  |PARTITIONED|
+                                                  project ([$$su]) [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                                  -- STREAM_PROJECT  |PARTITIONED|
+                                                    exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                                     -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                                      empty-tuple-source [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
-                                                      -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                                                      data-scan []<-[$$252, $$su] <- test.supplier [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                                      -- DATASOURCE_SCAN  |PARTITIONED|
+                                                        exchange [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
+                                                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                          empty-tuple-source [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
+                                                          -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                                      exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                      -- BROADCAST_EXCHANGE  |PARTITIONED|
+                                        assign [$$275, $$274, $$256] <- [$$n.getField("n_name"), $$n.getField("n_nationkey"), $$n.getField("n_regionkey")] project: [$$275, $$256, $$274] [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                        -- ASSIGN  |PARTITIONED|
+                                          project ([$$n]) [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                          -- STREAM_PROJECT  |PARTITIONED|
+                                            exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                              data-scan []<-[$$250, $$n] <- test.nation [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                              -- DATASOURCE_SCAN  |PARTITIONED|
+                                                exchange [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
+                                                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                  empty-tuple-source [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
+                                                  -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
                               exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                               -- BROADCAST_EXCHANGE  |PARTITIONED|
-                                assign [$$269, $$266] <- [$$su.getField("su_suppkey"), $$su.getField("su_nationkey")] project: [$$266, $$269] [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
-                                -- ASSIGN  |PARTITIONED|
-                                  project ([$$su]) [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
-                                  -- STREAM_PROJECT  |PARTITIONED|
-                                    exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
-                                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                      data-scan []<-[$$252, $$su] <- test.supplier [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
-                                      -- DATASOURCE_SCAN  |PARTITIONED|
-                                        exchange [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
-                                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                          empty-tuple-source [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
-                                          -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                                select (eq($$r.getField("r_name"), "Asia")) project: [$$257] [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                -- STREAM_SELECT  |PARTITIONED|
+                                  assign [$$257] <- [$$r.getField("r_regionkey")] [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                  -- ASSIGN  |PARTITIONED|
+                                    project ([$$r]) [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                    -- STREAM_PROJECT  |PARTITIONED|
+                                      exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                        data-scan []<-[$$251, $$r] <- test.region [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                        -- DATASOURCE_SCAN  |PARTITIONED|
+                                          exchange [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
+                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                            empty-tuple-source [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
+                                            -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results_cbo/ch2/ch2_q7.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results_cbo/ch2/ch2_q7.plan
index dc98276..2eeb76c 100644
--- a/asterixdb/asterix-app/src/test/resources/optimizerts/results_cbo/ch2/ch2_q7.plan
+++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results_cbo/ch2/ch2_q7.plan
@@ -1,172 +1,172 @@
-distribute result [$$271] [cardinality: 9.223372036854776E16, op-cost: 0.0, total-cost: 9.223372036854776E16]
+distribute result [$$271] [cardinality: 9.223372036854776E16, doc-size: -6.0, op-cost: 0.0, total-cost: 9.223372036854776E16]
 -- DISTRIBUTE_RESULT  |PARTITIONED|
-  exchange [cardinality: 9.223372036854776E16, op-cost: 0.0, total-cost: 9.223372036854776E16]
+  exchange [cardinality: 9.223372036854776E16, doc-size: -6.0, op-cost: 0.0, total-cost: 9.223372036854776E16]
   -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-    assign [$$271] <- [{"supp_nation": $$su_nationkey, "cust_nation": $#1, "l_year": $#2, "revenue": round($$301, 2)}] project: [$$271] [cardinality: 9.223372036854776E16, op-cost: 0.0, total-cost: 9.223372036854776E16]
+    assign [$$271] <- [{"supp_nation": $$su_nationkey, "cust_nation": $#1, "l_year": $#2, "revenue": round($$301, 2)}] project: [$$271] [cardinality: 9.223372036854776E16, doc-size: -6.0, op-cost: 0.0, total-cost: 9.223372036854776E16]
     -- ASSIGN  |PARTITIONED|
-      exchange [cardinality: 9.223372036854776E16, op-cost: 0.0, total-cost: 9.223372036854776E16]
+      exchange [cardinality: 9.223372036854776E16, doc-size: -6.0, op-cost: 0.0, total-cost: 9.223372036854776E16]
       -- SORT_MERGE_EXCHANGE [$$su_nationkey(ASC), $#1(ASC), $#2(ASC) ]  |PARTITIONED|
         group by ([$$su_nationkey := $$328; $#1 := $$329; $#2 := $$330]) decor ([]) {
-                  aggregate [$$301] <- [agg-global-sql-sum($$327)]
+                  aggregate [$$301] <- [agg-global-sql-sum($$327)] [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
                   -- AGGREGATE  |LOCAL|
-                    nested tuple source
+                    nested tuple source [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
                     -- NESTED_TUPLE_SOURCE  |LOCAL|
-               } [cardinality: 0.0, op-cost: 0.0, total-cost: 9.223372036854776E16]
+               } [cardinality: 9.223372036854776E16, doc-size: -6.0, op-cost: 0.0, total-cost: 9.223372036854776E16]
         -- SORT_GROUP_BY[$$328, $$329, $$330]  |PARTITIONED|
-          exchange [cardinality: 9.223372036854776E16, op-cost: 0.0, total-cost: 9.223372036854776E16]
+          exchange [cardinality: 9.223372036854776E16, doc-size: -6.0, op-cost: 0.0, total-cost: 9.223372036854776E16]
           -- HASH_PARTITION_EXCHANGE [$$328, $$329, $$330]  |PARTITIONED|
             group by ([$$328 := $$277; $$329 := $$273; $$330 := $$274]) decor ([]) {
-                      aggregate [$$327] <- [agg-local-sql-sum($$313)]
+                      aggregate [$$327] <- [agg-local-sql-sum($$313)] [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
                       -- AGGREGATE  |LOCAL|
-                        nested tuple source
+                        nested tuple source [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
                         -- NESTED_TUPLE_SOURCE  |LOCAL|
-                   } [cardinality: 0.0, op-cost: 0.0, total-cost: 9.223372036854776E16]
+                   } [cardinality: 9.223372036854776E16, doc-size: -6.0, op-cost: 0.0, total-cost: 9.223372036854776E16]
             -- SORT_GROUP_BY[$$277, $$273, $$274]  |PARTITIONED|
-              exchange [cardinality: 9.223372036854776E16, op-cost: 0.0, total-cost: 9.223372036854776E16]
+              exchange [cardinality: 9.223372036854776E16, doc-size: -6.0, op-cost: 0.0, total-cost: 9.223372036854776E16]
               -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                assign [$$274, $$273] <- [get-year(date($$309)), substring1($$285, 1, 1)] project: [$$313, $$277, $$273, $$274] [cardinality: 9.223372036854776E16, op-cost: 0.0, total-cost: 9.223372036854776E16]
+                assign [$$274, $$273] <- [get-year(date($$309)), substring1($$285, 1, 1)] project: [$$313, $$277, $$273, $$274] [cardinality: 9.223372036854776E16, doc-size: -6.0, op-cost: 0.0, total-cost: 9.223372036854776E16]
                 -- ASSIGN  |PARTITIONED|
-                  project ([$$313, $$277, $$309, $$285]) [cardinality: 9.223372036854776E16, op-cost: 0.0, total-cost: 9.223372036854776E16]
+                  project ([$$313, $$277, $$309, $$285]) [cardinality: 9.223372036854776E16, doc-size: -6.0, op-cost: 0.0, total-cost: 9.223372036854776E16]
                   -- STREAM_PROJECT  |PARTITIONED|
-                    exchange [cardinality: 9.223372036854776E16, op-cost: 0.0, total-cost: 9.223372036854776E16]
+                    exchange [cardinality: 9.223372036854776E16, doc-size: -6.0, op-cost: 0.0, total-cost: 9.223372036854776E16]
                     -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                      join (and(eq($$277, $$303), or(and(eq($$286, "Germany"), eq($$287, "Cambodia")), and(eq($$286, "Cambodia"), eq($$287, "Germany"))))) [cardinality: 9.223372036854776E16, op-cost: 9.223372036854776E16, total-cost: 9.223372036854776E16]
-                      -- HYBRID_HASH_JOIN [$$277][$$303]  |PARTITIONED|
-                        exchange [cardinality: 9.223372036854776E16, op-cost: 0.0, total-cost: 5.00025E11]
+                      join (and(eq($$288, $$304), or(and(eq($$286, "Germany"), eq($$287, "Cambodia")), and(eq($$286, "Cambodia"), eq($$287, "Germany"))))) [cardinality: 9.223372036854776E16, doc-size: -6.0, op-cost: 9.223372036854776E16, total-cost: 9.223372036854776E16]
+                      -- HYBRID_HASH_JOIN [$$288][$$304]  |PARTITIONED|
+                        exchange [cardinality: 9.223372036854776E16, doc-size: -5.0, op-cost: 0.0, total-cost: 5.00025E11]
                         -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                          project ([$$313, $$277, $$309, $$285, $$287]) [cardinality: 9.223372036854776E16, op-cost: 0.0, total-cost: 5.00025E11]
+                          project ([$$313, $$277, $$309, $$285, $$288, $$286]) [cardinality: 9.223372036854776E16, doc-size: -5.0, op-cost: 0.0, total-cost: 5.00025E11]
                           -- STREAM_PROJECT  |PARTITIONED|
-                            exchange [cardinality: 9.223372036854776E16, op-cost: 0.0, total-cost: 5.00025E11]
+                            exchange [cardinality: 9.223372036854776E16, doc-size: -5.0, op-cost: 0.0, total-cost: 5.00025E11]
                             -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                              join (eq($$326, $$300)) [cardinality: 9.223372036854776E16, op-cost: 5.00004E11, total-cost: 5.00025E11]
-                              -- HYBRID_HASH_JOIN [$$326][$$300]  |PARTITIONED|
-                                exchange [cardinality: 5.0E11, op-cost: 0.0, total-cost: 1.6E7]
+                              join (eq($$277, $$303)) [cardinality: 9.223372036854776E16, doc-size: -5.0, op-cost: 5.00004E11, total-cost: 5.00025E11]
+                              -- HYBRID_HASH_JOIN [$$277][$$303]  |PARTITIONED|
+                                exchange [cardinality: 5.0E11, doc-size: -4.0, op-cost: 0.0, total-cost: 1.6E7]
                                 -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                  assign [$$326] <- [numeric-mod(numeric-multiply($$275, $$276), 10000)] project: [$$313, $$309, $$285, $$287, $$326] [cardinality: 5.0E11, op-cost: 0.0, total-cost: 1.6E7]
-                                  -- ASSIGN  |PARTITIONED|
-                                    project ([$$313, $$309, $$285, $$275, $$276, $$287]) [cardinality: 5.0E11, op-cost: 0.0, total-cost: 1.6E7]
-                                    -- STREAM_PROJECT  |PARTITIONED|
-                                      exchange [cardinality: 5.0E11, op-cost: 0.0, total-cost: 1.6E7]
-                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                        join (eq($$288, $$304)) [cardinality: 5.0E11, op-cost: 2000000.0, total-cost: 1.6E7]
-                                        -- HYBRID_HASH_JOIN [$$288][$$304]  |PARTITIONED|
-                                          exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 1.2E7]
-                                          -- HASH_PARTITION_EXCHANGE [$$288]  |PARTITIONED|
-                                            project ([$$313, $$309, $$285, $$275, $$276, $$288]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1.1E7]
+                                  project ([$$313, $$277, $$309, $$285, $$288]) [cardinality: 5.0E11, doc-size: -4.0, op-cost: 0.0, total-cost: 1.6E7]
+                                  -- STREAM_PROJECT  |PARTITIONED|
+                                    exchange [cardinality: 5.0E11, doc-size: -4.0, op-cost: 0.0, total-cost: 1.6E7]
+                                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                      join (eq($$326, $$300)) [cardinality: 5.0E11, doc-size: -4.0, op-cost: 2000000.0, total-cost: 1.6E7]
+                                      -- HYBRID_HASH_JOIN [$$326][$$300]  |PARTITIONED|
+                                        exchange [cardinality: 1000000.0, doc-size: -3.0, op-cost: 0.0, total-cost: 1.1E7]
+                                        -- HASH_PARTITION_EXCHANGE [$$326]  |PARTITIONED|
+                                          assign [$$326] <- [numeric-mod(numeric-multiply($$275, $$276), 10000)] project: [$$313, $$309, $$285, $$288, $$326] [cardinality: 1000000.0, doc-size: -3.0, op-cost: 0.0, total-cost: 1.1E7]
+                                          -- ASSIGN  |PARTITIONED|
+                                            project ([$$313, $$309, $$275, $$276, $$285, $$288]) [cardinality: 1000000.0, doc-size: -3.0, op-cost: 0.0, total-cost: 1.1E7]
                                             -- STREAM_PROJECT  |PARTITIONED|
-                                              exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 1.2E7]
+                                              exchange [cardinality: 1000000.0, doc-size: -3.0, op-cost: 0.0, total-cost: 1.1E7]
                                               -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                                join (and(eq($$305, $$291), eq($$306, $$293), eq($$307, $$295))) [cardinality: 1000000.0, op-cost: 2000000.0, total-cost: 1.1E7]
+                                                join (and(eq($$305, $$291), eq($$306, $$293), eq($$307, $$295))) [cardinality: 1000000.0, doc-size: -3.0, op-cost: 2000000.0, total-cost: 1.1E7]
                                                 -- HYBRID_HASH_JOIN [$$291, $$293, $$295][$$305, $$306, $$307]  |PARTITIONED|
-                                                  exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 7000000.0]
+                                                  exchange [cardinality: 1000000.0, doc-size: -2.0, op-cost: 0.0, total-cost: 6000000.0]
                                                   -- HASH_PARTITION_EXCHANGE [$$291, $$293, $$295]  |PARTITIONED|
-                                                    project ([$$313, $$309, $$275, $$276, $$291, $$293, $$295]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0]
+                                                    project ([$$313, $$309, $$275, $$276, $$291, $$293, $$295]) [cardinality: 1000000.0, doc-size: -2.0, op-cost: 0.0, total-cost: 6000000.0]
                                                     -- STREAM_PROJECT  |PARTITIONED|
-                                                      exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 7000000.0]
+                                                      exchange [cardinality: 1000000.0, doc-size: -2.0, op-cost: 0.0, total-cost: 6000000.0]
                                                       -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                                        join (and(eq($$310, $$275), eq($$311, $$276))) [cardinality: 1000000.0, op-cost: 2000000.0, total-cost: 6000000.0]
+                                                        join (and(eq($$310, $$275), eq($$311, $$276))) [cardinality: 1000000.0, doc-size: -2.0, op-cost: 2000000.0, total-cost: 6000000.0]
                                                         -- HYBRID_HASH_JOIN [$$275, $$276][$$310, $$311]  |PARTITIONED|
-                                                          exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0]
+                                                          exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                                           -- HASH_PARTITION_EXCHANGE [$$275, $$276]  |PARTITIONED|
-                                                            assign [$$276, $$275] <- [$$s.getField("s_i_id"), $$s.getField("s_w_id")] project: [$$275, $$276] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+                                                            assign [$$276, $$275] <- [$$s.getField("s_i_id"), $$s.getField("s_w_id")] project: [$$275, $$276] [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                                             -- ASSIGN  |PARTITIONED|
-                                                              project ([$$s]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+                                                              project ([$$s]) [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                                               -- STREAM_PROJECT  |PARTITIONED|
-                                                                exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0]
+                                                                exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                                                 -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                                                  data-scan []<-[$$278, $$s] <- test.stock [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 1000000.0]
+                                                                  data-scan []<-[$$278, $$s] <- test.stock [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                                                   -- DATASOURCE_SCAN  |PARTITIONED|
-                                                                    exchange
+                                                                    exchange [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
                                                                     -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                                                      empty-tuple-source
+                                                                      empty-tuple-source [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
                                                                       -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
-                                                          exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0]
+                                                          exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                                           -- HASH_PARTITION_EXCHANGE [$$310, $$311]  |PARTITIONED|
-                                                            select (and(ge($$284, "2017-01-01 00:00:00.000000"), le($$284, "2018-12-31 00:00:00.000000"))) project: [$$313, $$309, $$291, $$293, $$295, $$310, $$311] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+                                                            select (and(ge($$284, "2017-01-01 00:00:00.000000"), le($$284, "2018-12-31 00:00:00.000000"))) project: [$$313, $$309, $$291, $$293, $$295, $$310, $$311] [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                                             -- STREAM_SELECT  |PARTITIONED|
-                                                              assign [$$313, $$311, $$310, $$284] <- [$$ol.getField("ol_amount"), $$ol.getField("ol_i_id"), $$ol.getField("ol_supply_w_id"), $$ol.getField("ol_delivery_d")] project: [$$309, $$295, $$293, $$291, $$313, $$311, $$310, $$284] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+                                                              assign [$$313, $$311, $$310, $$284] <- [$$ol.getField("ol_amount"), $$ol.getField("ol_i_id"), $$ol.getField("ol_supply_w_id"), $$ol.getField("ol_delivery_d")] project: [$$309, $$295, $$293, $$291, $$313, $$311, $$310, $$284] [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                                               -- ASSIGN  |PARTITIONED|
-                                                                unnest $$ol <- scan-collection($$302) project: [$$309, $$295, $$293, $$291, $$ol] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+                                                                unnest $$ol <- scan-collection($$302) project: [$$309, $$295, $$293, $$291, $$ol] [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                                                 -- UNNEST  |PARTITIONED|
-                                                                  assign [$$309, $$295, $$293, $$291, $$302] <- [$$o.getField("o_entry_d"), $$o.getField("o_d_id"), $$o.getField("o_w_id"), $$o.getField("o_c_id"), $$o.getField("o_orderline")] project: [$$309, $$295, $$293, $$291, $$302] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+                                                                  assign [$$309, $$295, $$293, $$291, $$302] <- [$$o.getField("o_entry_d"), $$o.getField("o_d_id"), $$o.getField("o_w_id"), $$o.getField("o_c_id"), $$o.getField("o_orderline")] project: [$$309, $$295, $$293, $$291, $$302] [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                                                   -- ASSIGN  |PARTITIONED|
-                                                                    project ([$$o]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+                                                                    project ([$$o]) [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                                                     -- STREAM_PROJECT  |PARTITIONED|
-                                                                      exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0]
+                                                                      exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                                                       -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                                                        data-scan []<-[$$279, $$o] <- test.orders [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 1000000.0]
+                                                                        data-scan []<-[$$279, $$o] <- test.orders [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                                                         -- DATASOURCE_SCAN  |PARTITIONED|
-                                                                          exchange
+                                                                          exchange [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
                                                                           -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                                                            empty-tuple-source
+                                                                            empty-tuple-source [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
                                                                             -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
-                                                  exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0]
+                                                  exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                                   -- HASH_PARTITION_EXCHANGE [$$305, $$306, $$307]  |PARTITIONED|
-                                                    assign [$$288] <- [get-item(string-to-codepoint($$285), 0)] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+                                                    assign [$$288] <- [get-item(string-to-codepoint($$285), 0)] [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                                     -- ASSIGN  |PARTITIONED|
-                                                      assign [$$307, $$306, $$305, $$285] <- [$$c.getField("c_d_id"), $$c.getField("c_w_id"), $$c.getField("c_id"), $$c.getField("c_state")] project: [$$307, $$306, $$305, $$285] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+                                                      assign [$$307, $$306, $$305, $$285] <- [$$c.getField("c_d_id"), $$c.getField("c_w_id"), $$c.getField("c_id"), $$c.getField("c_state")] project: [$$307, $$306, $$305, $$285] [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                                       -- ASSIGN  |PARTITIONED|
-                                                        project ([$$c]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+                                                        project ([$$c]) [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                                         -- STREAM_PROJECT  |PARTITIONED|
-                                                          exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0]
+                                                          exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                                           -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                                            data-scan []<-[$$280, $$c] <- test.customer [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 1000000.0]
+                                                            data-scan []<-[$$280, $$c] <- test.customer [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                                             -- DATASOURCE_SCAN  |PARTITIONED|
-                                                              exchange
+                                                              exchange [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
                                                               -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                                                empty-tuple-source
+                                                                empty-tuple-source [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
                                                                 -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
-                                          exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0]
-                                          -- HASH_PARTITION_EXCHANGE [$$304]  |PARTITIONED|
-                                            replicate [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
-                                            -- REPLICATE  |PARTITIONED|
-                                              exchange [cardinality: 1000000.0, op-cost: 4000000.0, total-cost: 5000000.0]
+                                        exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                        -- HASH_PARTITION_EXCHANGE [$$300]  |PARTITIONED|
+                                          assign [$$300, $$277] <- [$$su.getField("su_suppkey"), $$su.getField("su_nationkey")] project: [$$277, $$300] [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                          -- ASSIGN  |PARTITIONED|
+                                            project ([$$su]) [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                            -- STREAM_PROJECT  |PARTITIONED|
+                                              exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                               -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                                assign [$$304, $$287] <- [$$n2.getField("n_nationkey"), $$n2.getField("n_name")] project: [$$287, $$304] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
-                                                -- ASSIGN  |PARTITIONED|
-                                                  project ([$$n2]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
-                                                  -- STREAM_PROJECT  |PARTITIONED|
-                                                    exchange [cardinality: 1000000.0, op-cost: 4000000.0, total-cost: 5000000.0]
-                                                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                                      data-scan []<-[$$282, $$n2] <- test.nation [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 1000000.0]
-                                                      -- DATASOURCE_SCAN  |PARTITIONED|
-                                                        exchange
-                                                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                                          empty-tuple-source
-                                                          -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
-                                exchange [cardinality: 1000000.0, op-cost: 4000000.0, total-cost: 5000000.0]
-                                -- BROADCAST_EXCHANGE  |PARTITIONED|
-                                  assign [$$300, $$277] <- [$$su.getField("su_suppkey"), $$su.getField("su_nationkey")] project: [$$277, $$300] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
-                                  -- ASSIGN  |PARTITIONED|
-                                    project ([$$su]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
-                                    -- STREAM_PROJECT  |PARTITIONED|
-                                      exchange [cardinality: 1000000.0, op-cost: 4000000.0, total-cost: 5000000.0]
-                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                        data-scan []<-[$$283, $$su] <- test.supplier [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 1000000.0]
-                                        -- DATASOURCE_SCAN  |PARTITIONED|
-                                          exchange
-                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                            empty-tuple-source
-                                            -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
-                        exchange [cardinality: 1000000.0, op-cost: 4000000.0, total-cost: 5000000.0]
-                        -- BROADCAST_EXCHANGE  |PARTITIONED|
-                          assign [$$286, $$303] <- [$$287, $$304] project: [$$286, $$303] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
-                          -- ASSIGN  |PARTITIONED|
-                            exchange [cardinality: 1000000.0, op-cost: 4000000.0, total-cost: 5000000.0]
-                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                              replicate [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
-                              -- REPLICATE  |PARTITIONED|
-                                exchange [cardinality: 1000000.0, op-cost: 4000000.0, total-cost: 5000000.0]
+                                                data-scan []<-[$$283, $$su] <- test.supplier [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                                -- DATASOURCE_SCAN  |PARTITIONED|
+                                                  exchange [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
+                                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                    empty-tuple-source [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
+                                                    -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                                exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                 -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                  assign [$$304, $$287] <- [$$n2.getField("n_nationkey"), $$n2.getField("n_name")] project: [$$287, $$304] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
-                                  -- ASSIGN  |PARTITIONED|
-                                    project ([$$n2]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
-                                    -- STREAM_PROJECT  |PARTITIONED|
-                                      exchange [cardinality: 1000000.0, op-cost: 4000000.0, total-cost: 5000000.0]
-                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                        data-scan []<-[$$282, $$n2] <- test.nation [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 1000000.0]
-                                        -- DATASOURCE_SCAN  |PARTITIONED|
-                                          exchange
+                                  replicate [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                  -- REPLICATE  |PARTITIONED|
+                                    exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                    -- BROADCAST_EXCHANGE  |PARTITIONED|
+                                      assign [$$303, $$286] <- [$$n1.getField("n_nationkey"), $$n1.getField("n_name")] project: [$$286, $$303] [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                      -- ASSIGN  |PARTITIONED|
+                                        project ([$$n1]) [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                        -- STREAM_PROJECT  |PARTITIONED|
+                                          exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
                                           -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                            empty-tuple-source
-                                            -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
\ No newline at end of file
+                                            data-scan []<-[$$281, $$n1] <- test.nation [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                            -- DATASOURCE_SCAN  |PARTITIONED|
+                                              exchange [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
+                                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                empty-tuple-source [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
+                                                -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                        exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                          assign [$$287, $$304] <- [$$286, $$303] project: [$$287, $$304] [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                          -- ASSIGN  |PARTITIONED|
+                            exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                              replicate [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                              -- REPLICATE  |PARTITIONED|
+                                exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                -- BROADCAST_EXCHANGE  |PARTITIONED|
+                                  assign [$$303, $$286] <- [$$n1.getField("n_nationkey"), $$n1.getField("n_name")] project: [$$286, $$303] [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                  -- ASSIGN  |PARTITIONED|
+                                    project ([$$n1]) [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                    -- STREAM_PROJECT  |PARTITIONED|
+                                      exchange [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                        data-scan []<-[$$281, $$n1] <- test.nation [cardinality: 1000000.0, doc-size: -1.0, op-cost: 0.0, total-cost: 1000000.0]
+                                        -- DATASOURCE_SCAN  |PARTITIONED|
+                                          exchange [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
+                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                            empty-tuple-source [cardinality: 0.0, doc-size: 0.0, op-cost: 0.0, total-cost: 0.0]
+                                            -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/filter/subplan/subplan.042.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/filter/subplan/subplan.042.plan
index ef307c5..6ba2a43 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/filter/subplan/subplan.042.plan
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/filter/subplan/subplan.042.plan
@@ -1,16 +1,16 @@
-distribute result [$$70] [cardinality: 6.0, doc-size: 11.0, op-cost: 0.0, total-cost: 28.6]
+distribute result [$$70] [cardinality: 3.08, doc-size: 11.0, op-cost: 0.0, total-cost: 28.6]
 -- DISTRIBUTE_RESULT  |UNPARTITIONED|
-  exchange [cardinality: 6.0, doc-size: 11.0, op-cost: 0.0, total-cost: 28.6]
+  exchange [cardinality: 3.08, doc-size: 11.0, op-cost: 0.0, total-cost: 28.6]
   -- ONE_TO_ONE_EXCHANGE  |UNPARTITIONED|
-    aggregate [$$70] <- [agg-sql-sum($$76)] [cardinality: 6.0, doc-size: 11.0, op-cost: 0.0, total-cost: 28.6]
+    aggregate [$$70] <- [agg-sql-sum($$76)] [cardinality: 3.08, doc-size: 11.0, op-cost: 0.0, total-cost: 28.6]
     -- AGGREGATE  |UNPARTITIONED|
-      exchange [cardinality: 6.0, doc-size: 11.0, op-cost: 0.0, total-cost: 28.6]
+      exchange [cardinality: 3.08, doc-size: 11.0, op-cost: 0.0, total-cost: 28.6]
       -- RANDOM_MERGE_EXCHANGE  |PARTITIONED|
-        aggregate [$$76] <- [agg-sql-count(1)] [cardinality: 6.0, doc-size: 11.0, op-cost: 0.0, total-cost: 28.6]
+        aggregate [$$76] <- [agg-sql-count(1)] [cardinality: 3.08, doc-size: 11.0, op-cost: 0.0, total-cost: 28.6]
         -- AGGREGATE  |PARTITIONED|
-          exchange [cardinality: 6.0, doc-size: 11.0, op-cost: 0.0, total-cost: 28.6]
+          exchange [cardinality: 3.08, doc-size: 11.0, op-cost: 0.0, total-cost: 28.6]
           -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-            join (or(eq($$71, "7"), neq($$69, 0))) [cardinality: 6.0, doc-size: 11.0, op-cost: 12.6, total-cost: 28.6]
+            join (or(eq($$71, "7"), neq($$69, 0))) [cardinality: 3.08, doc-size: 11.0, op-cost: 12.6, total-cost: 28.6]
             -- NESTED_LOOP  |PARTITIONED|
               exchange [cardinality: 6.0, doc-size: 9.0, op-cost: 0.0, total-cost: 6.0]
               -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|