[ASTERIXDB-3555][COMP] Use Join Samples to get Join Selectivity

Ext-ref: MB-65101

Change-Id: Iacf111e2d6ed5307b747045a57107eccb872d3f0
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19496
Reviewed-by: <murali.krishna@couchbase.com>
Reviewed-by: Peeyush Gupta <peeyush.gupta@couchbase.com>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index 52566ef..e1aab84 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -187,17 +187,21 @@
                 return 0.5; // this may not be accurate obviously!
             } // we can do all relops here and other joins such as interval joins and spatial joins, the compile time might increase a lot
 
+            //If one of the tables is smaller than the target sample size, we can join the samples directly
+            // to get a good estimate of the join selectivity.
             Index.SampleIndexDetails idxDetails1 = (Index.SampleIndexDetails) index1.getIndexDetails();
             Index.SampleIndexDetails idxDetails2 = (Index.SampleIndexDetails) index2.getIndexDetails();
             if ((idxDetails1.getSourceCardinality() < idxDetails1.getSampleCardinalityTarget())
                     || (idxDetails2.getSourceCardinality() < idxDetails2.getSampleCardinalityTarget())) {
                 double sel = findJoinSelFromSamples(joinEnum.leafInputs.get(idx1 - 1),
                         joinEnum.leafInputs.get(idx2 - 1), index1, index2, joinExpr, jOp);
-                if (sel > 0.0) { // if sel is 0.0 we call naiveJoinSelectivity
-                    return sel;
+
+                if (sel == 0.0) {
+                    sel = 1.0 / Math.max(card1, card2); // R.uniq = S.uniq is nicely modelled here. Good heuristic Best we can do so far.
                 }
+                return sel;
             }
-            // Now we can handle only equi joins. We make all the uniform and independence assumptions here.
+            // Now we can handle only equi joins. We make all the uniform and independence assumptions here. Works well for Pk-FK joins.
             double sel = naiveJoinSelectivity(exprUsedVars, card1, card2, idx1, idx2);
             return sel;
         }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan
index 8aae0e3..29e8edc 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan
@@ -1,20 +1,20 @@
-distribute result [$$52] [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+distribute result [$$52] [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
 -- DISTRIBUTE_RESULT  |UNPARTITIONED|
-  exchange [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+  exchange [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
   -- ONE_TO_ONE_EXCHANGE  |UNPARTITIONED|
-    assign [$$52] <- [{"$1": $$57}] project: [$$52] [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+    assign [$$52] <- [{"$1": $$57}] project: [$$52] [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
     -- ASSIGN  |UNPARTITIONED|
-      aggregate [$$57] <- [agg-sql-sum($$60)] [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+      aggregate [$$57] <- [agg-sql-sum($$60)] [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
       -- AGGREGATE  |UNPARTITIONED|
-        exchange [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+        exchange [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
         -- RANDOM_MERGE_EXCHANGE  |PARTITIONED|
-          aggregate [$$60] <- [agg-sql-count($$50)] [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+          aggregate [$$60] <- [agg-sql-count($$50)] [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
           -- AGGREGATE  |PARTITIONED|
-            project ([$$50]) [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+            project ([$$50]) [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
             -- STREAM_PROJECT  |PARTITIONED|
-              exchange [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+              exchange [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
               -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                join (eq($$55, $$56)) [cardinality: 4.0, doc-size: 2.0, op-cost: 4.0, total-cost: 12.0]
+                join (eq($$55, $$56)) [cardinality: 2.1, doc-size: 2.0, op-cost: 4.0, total-cost: 12.0]
                 -- HYBRID_HASH_JOIN [$$55][$$56]  |PARTITIONED|
                   exchange [cardinality: 2.0, doc-size: 1.0, op-cost: 0.0, total-cost: 2.0]
                   -- HASH_PARTITION_EXCHANGE [$$55]  |PARTITIONED|