[ASTERIXDB-3555][COMP] Use Join Samples to get Join Selectivity
Ext-ref: MB-65101
Change-Id: Iacf111e2d6ed5307b747045a57107eccb872d3f0
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19496
Reviewed-by: <murali.krishna@couchbase.com>
Reviewed-by: Peeyush Gupta <peeyush.gupta@couchbase.com>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index 52566ef..e1aab84 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -187,17 +187,21 @@
return 0.5; // this may not be accurate obviously!
} // we can do all relops here and other joins such as interval joins and spatial joins, the compile time might increase a lot
+ //If one of the tables is smaller than the target sample size, we can join the samples directly
+ // to get a good estimate of the join selectivity.
Index.SampleIndexDetails idxDetails1 = (Index.SampleIndexDetails) index1.getIndexDetails();
Index.SampleIndexDetails idxDetails2 = (Index.SampleIndexDetails) index2.getIndexDetails();
if ((idxDetails1.getSourceCardinality() < idxDetails1.getSampleCardinalityTarget())
|| (idxDetails2.getSourceCardinality() < idxDetails2.getSampleCardinalityTarget())) {
double sel = findJoinSelFromSamples(joinEnum.leafInputs.get(idx1 - 1),
joinEnum.leafInputs.get(idx2 - 1), index1, index2, joinExpr, jOp);
- if (sel > 0.0) { // if sel is 0.0 we call naiveJoinSelectivity
- return sel;
+
+ if (sel == 0.0) {
+ sel = 1.0 / Math.max(card1, card2); // R.uniq = S.uniq is nicely modelled here. Good heuristic Best we can do so far.
}
+ return sel;
}
- // Now we can handle only equi joins. We make all the uniform and independence assumptions here.
+ // Now we can handle only equi joins. We make all the uniform and independence assumptions here. Works well for Pk-FK joins.
double sel = naiveJoinSelectivity(exprUsedVars, card1, card2, idx1, idx2);
return sel;
}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan
index 8aae0e3..29e8edc 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan
@@ -1,20 +1,20 @@
-distribute result [$$52] [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+distribute result [$$52] [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
-- DISTRIBUTE_RESULT |UNPARTITIONED|
- exchange [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+ exchange [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
-- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
- assign [$$52] <- [{"$1": $$57}] project: [$$52] [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+ assign [$$52] <- [{"$1": $$57}] project: [$$52] [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
-- ASSIGN |UNPARTITIONED|
- aggregate [$$57] <- [agg-sql-sum($$60)] [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+ aggregate [$$57] <- [agg-sql-sum($$60)] [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
-- AGGREGATE |UNPARTITIONED|
- exchange [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+ exchange [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
-- RANDOM_MERGE_EXCHANGE |PARTITIONED|
- aggregate [$$60] <- [agg-sql-count($$50)] [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+ aggregate [$$60] <- [agg-sql-count($$50)] [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
-- AGGREGATE |PARTITIONED|
- project ([$$50]) [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+ project ([$$50]) [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
-- STREAM_PROJECT |PARTITIONED|
- exchange [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+ exchange [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- join (eq($$55, $$56)) [cardinality: 4.0, doc-size: 2.0, op-cost: 4.0, total-cost: 12.0]
+ join (eq($$55, $$56)) [cardinality: 2.1, doc-size: 2.0, op-cost: 4.0, total-cost: 12.0]
-- HYBRID_HASH_JOIN [$$55][$$56] |PARTITIONED|
exchange [cardinality: 2.0, doc-size: 1.0, op-cost: 0.0, total-cost: 2.0]
-- HASH_PARTITION_EXCHANGE [$$55] |PARTITIONED|