[ASTERIXDB-3571][COMP] Infer Selection predicates from join predicates

Ext-ref: MB-65670

Change-Id: I7766e107bfadd915b6d760fc1ab74a6651c49db6
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19943
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: <preetham02@apache.org>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java
index 0f4cc82..a75a90b 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java
@@ -19,10 +19,12 @@
 
 package org.apache.asterix.optimizer.rules.cbo;
 
+import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
 import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
 
 public class JoinCondition {
 
@@ -47,6 +49,7 @@
     protected comparisonOp comparisonType;
     protected JoinOperator joinOp = null;
     protected List<LogicalVariable> usedVars = null;
+    protected List<SelectOperator> derivedSelOps = new ArrayList<>(); // only one of them will be regarded as original
 
     protected enum comparisonOp {
         OP_EQ,
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
index 73be298..16518c2 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
@@ -1334,6 +1334,7 @@
 
         findJoinConditionsAndDoTC();
         addTCSelectionPredicates();
+        keepOnlyOneSelectivityHint();
         int lastBaseLevelJnNum = enumerateBaseLevelJoinNodes();
         if (lastBaseLevelJnNum == PlanNode.NO_PLAN) {
             return PlanNode.NO_PLAN;
@@ -1375,6 +1376,26 @@
         return lastJn.cheapestPlanIndex;
     }
 
+    private void keepOnlyOneSelectivityHint() {
+        AbstractFunctionCallExpression afce;
+        for (JoinCondition jc : joinConditions) {
+            int n = 0;
+            for (SelectOperator selOp : jc.derivedSelOps) {
+                afce = (AbstractFunctionCallExpression) selOp.getCondition().getValue();
+                if (afce.hasAnnotation(PredicateCardinalityAnnotation.class)) {
+                    n++;
+                }
+            }
+            if (n <= 1) { // R.a = S.a and R.a < 1
+                return; // perfect. At most one predicate has the annotation
+            } else {// n == 2, both of them have it of them have it, So remove it from the last one
+                // R.a = S.a and R.a < 1 and S.a < 1; user typed in both predicates, so each one looks derived.
+                afce = (AbstractFunctionCallExpression) jc.derivedSelOps.get(n - 1).getCondition().getValue();
+                afce.removeAnnotation(PredicateCardinalityAnnotation.class);
+            }
+        }
+    }
+
     // R.a = S.a and R.a op operand ==> S.a op operand
     private void addTCSelectionPredicates() throws AlgebricksException {
         List<SelectOperator> existingSelOps = new ArrayList<>();
@@ -1396,33 +1417,33 @@
             List<JoinCondition> jcs = findVarinJoinPreds(var);
             for (JoinCondition jc : jcs) { // join predicate can be R.a = S.a or S.a = R.a. Check for both cases
                 if (var == jc.usedVars.get(0)) { // R.a
-                    newSelOp = makeNewSelOper(existingSelOps, jc.usedVars.get(1), // == S.a
+                    newSelOp = makeNewSelOper(jc, existingSelOps, jc.usedVars.get(1), // == S.a
                             ((AbstractFunctionCallExpression) selOp.getCondition().getValue()).getFunctionInfo(), // op
                             exp.getArguments().get(1)); // operand
                     if (newSelOp != null) { // does not already exist
-                        addSelOpToLeafInput(jc.usedVars.get(1), newSelOp);
+                        addSelOpToLeafInput(jc, jc.usedVars.get(1), newSelOp);
                     }
                 } else if (var == jc.usedVars.get(1)) { // R.a
-                    newSelOp = makeNewSelOper(existingSelOps, jc.usedVars.get(0), // == S.a
+                    newSelOp = makeNewSelOper(jc, existingSelOps, jc.usedVars.get(0), // == S.a
                             ((AbstractFunctionCallExpression) selOp.getCondition().getValue()).getFunctionInfo(), // op
                             exp.getArguments().get(1)); // operand
                     if (newSelOp != null) {
-                        addSelOpToLeafInput(jc.usedVars.get(0), newSelOp);
+                        addSelOpToLeafInput(jc, jc.usedVars.get(0), newSelOp);
                     }
                 }
             }
         }
     }
 
-    private SelectOperator makeNewSelOper(List<SelectOperator> existingSelOps, LogicalVariable var, IFunctionInfo tag,
-            Mutable<ILogicalExpression> arg) throws AlgebricksException {
+    private SelectOperator makeNewSelOper(JoinCondition jc, List<SelectOperator> existingSelOps, LogicalVariable var,
+            IFunctionInfo tag, Mutable<ILogicalExpression> arg) throws AlgebricksException {
         List<Mutable<ILogicalExpression>> arguments = new ArrayList<>();
         VariableReferenceExpression e1 = new VariableReferenceExpression(var);
         arguments.add(new MutableObject<>(e1)); // S.a
         arguments.add(new MutableObject<>(arg.getValue())); // this will be the operand
         ScalarFunctionCallExpression expr = new ScalarFunctionCallExpression(tag, arguments); //S.a op operand
         SelectOperator newsel = new SelectOperator(new MutableObject<>(expr), null, null);
-        if (newSelNotPresent(newsel, existingSelOps)) {
+        if (newSelNotPresent(jc, newsel, existingSelOps)) {
             LOGGER.info("adding newsel " + newsel.getCondition());
             return newsel; // add since it does not exist
         } else {
@@ -1430,21 +1451,33 @@
         }
     }
 
-    private boolean newSelNotPresent(SelectOperator newsel, List<SelectOperator> existingSelOps) {
+    private boolean newSelNotPresent(JoinCondition jc, SelectOperator newsel, List<SelectOperator> existingSelOps) {
         for (SelectOperator existingSelOp : existingSelOps) {
             if (newsel.getCondition().equals(existingSelOp.getCondition())) {
+                PredicateCardinalityAnnotation anno = new PredicateCardinalityAnnotation(0.9999); // cannot be 1.0 as check in setCardsAndSizes will not work
+                AbstractFunctionCallExpression afce =
+                        (AbstractFunctionCallExpression) existingSelOp.getCondition().getValue();
+                afce.putAnnotation(anno);
+                jc.derivedSelOps.add(existingSelOp);
                 return false;
             }
         }
         return true;
     }
 
-    private void addSelOpToLeafInput(LogicalVariable var, SelectOperator newSelOp) throws AlgebricksException {
+    private void addSelOpToLeafInput(JoinCondition jc, LogicalVariable var, SelectOperator newSelOp)
+            throws AlgebricksException {
         int l = varLeafInputIds.get(var); // get the corresponding leafInput using the map
         ILogicalOperator parent = leafInputs.get(l - 1);
         ILogicalOperator child = parent.getInputs().get(0).getValue();
         parent.getInputs().get(0).setValue(newSelOp);
         newSelOp.getInputs().add(new MutableObject<>(child));
+        // Add the selectivity annotation with selectivity 1.0;
+        // Note the actual cardinality will be different; but all join cardinalities should be ok.
+        PredicateCardinalityAnnotation anno = new PredicateCardinalityAnnotation(0.9999);
+        AbstractFunctionCallExpression afce = (AbstractFunctionCallExpression) newSelOp.getCondition().getValue();
+        afce.putAnnotation(anno);
+        jc.derivedSelOps.add(newSelOp);
         optCtx.computeAndSetTypeEnvironmentForOperator(newSelOp);
     }
 
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
index 3451966..5cbb6f8 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
@@ -371,6 +371,7 @@
 
         // There are predicates here. So skip the predicates and get the original dataset card.
         // Now apply all the predicates and get the card after all predicates are applied.
+        // We call the sampling query even if a selectivity hint was provided because we have to get the lengths of the variables.
         result = joinEnum.getStatsHandle().runSamplingQueryProjection(joinEnum.optCtx, leafInput, jnArrayIndex,
                 primaryKey);
         double predicateCardinalityFromSample = joinEnum.getStatsHandle().findPredicateCardinality(result, true);
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index 18bc53f..ab57d3f 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -153,15 +153,7 @@
                 return 1.0;
             }
             double productivity = anno.getJoinProductivity();
-            if (productivity <= 0) {
-                IWarningCollector warningCollector = joinEnum.optCtx.getWarningCollector();
-                if (warningCollector.shouldWarn()) {
-                    warningCollector.warn(Warning.of(joinExpr.getSourceLocation(), ErrorCode.INAPPLICABLE_HINT,
-                            "productivity",
-                            "Productivity specified: " + productivity + ", has to be a decimal value greater than 0"));
-                }
-                return 1.0;
-            }
+
             if (leftIndex == idx1) {
                 return productivity / card2;
             } else {