Improved rule matching for indexed NL joins using primary btree indexes (if hint was given). Fixed a few bugs in the access method rewrite rule. Added tests to guard against regressions.
git-svn-id: https://asterixdb.googlecode.com/svn/branches/asterix_stabilization@855 eaa15691-b419-025a-1212-ee371bd00084
diff --git a/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/primary-equi-join-multipred.aql b/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/primary-equi-join-multipred.aql
new file mode 100644
index 0000000..9b83718
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/primary-equi-join-multipred.aql
@@ -0,0 +1,48 @@
+/*
+ * Description : Equi joins two datasets, Customers and Orders, based on the customer id.
+ * Given the 'indexnl' hint we expect the join to be transformed
+ * into an indexed nested-loop join using Customers' primary index.
+ * We expect the additional predicates to be put into a select above the
+ * primary index search.
+ * Success : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+ number: int32,
+ street: string,
+ city: string
+}
+
+create type CustomerType as closed {
+ cid: int32,
+ name: string,
+ age: int32?,
+ address: AddressType?,
+ lastorder: {
+ oid: int32,
+ total: float
+ }
+}
+
+create type OrderType as closed {
+ oid: int32,
+ cid: int32,
+ orderstatus: string,
+ orderpriority: string,
+ clerk: string,
+ total: float
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+create dataset Orders(OrderType) partitioned by key oid;
+
+write output to nc1:"rttest/btree-index-join_primary-equi-join-multipred.adm";
+
+for $c in dataset('Customers')
+for $o in dataset('Orders')
+where $c.cid /*+ indexnl */ = $o.cid and $c.name < $o.orderstatus and $c.age < $o.cid
+return {"customer":$c, "order": $o}
diff --git a/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/secondary-equi-join-multipred.aql b/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/secondary-equi-join-multipred.aql
new file mode 100644
index 0000000..bd21ab0
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/secondary-equi-join-multipred.aql
@@ -0,0 +1,49 @@
+/*
+ * Description : Equi joins two datasets, DBLP and CSX, based on their title.
+ * DBLP has a secondary btree index on title, and given the 'indexnl' hint
+ * we expect the join to be transformed into an indexed nested-loop join.
+ * We expect the additional predicates to be put into a select above the
+ * primary index search.
+ * Success : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create type CSXType as closed {
+ id: int32,
+ csxid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index title_index on DBLP(title);
+
+write output to nc1:"rttest/btree-index-join_title-secondary-equi-join-multipred.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where $a.title /*+ indexnl */ = $b.title and $a.authors < $b.authors and $a.misc > $b.misc
+return {"arec": $a, "brec": $b}
diff --git a/asterix-app/src/test/resources/optimizerts/results/btree-index-join/primary-equi-join-multipred.plan b/asterix-app/src/test/resources/optimizerts/results/btree-index-join/primary-equi-join-multipred.plan
new file mode 100644
index 0000000..8cadace
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/results/btree-index-join/primary-equi-join-multipred.plan
@@ -0,0 +1,16 @@
+-- SINK_WRITE |PARTITIONED|
+ -- RANDOM_MERGE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- STREAM_SELECT |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
+ -- BTREE_SEARCH |UNPARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- STABLE_SORT [$$15(ASC)] |LOCAL|
+ -- HASH_PARTITION_EXCHANGE [$$15] |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- DATASOURCE_SCAN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/optimizerts/results/btree-index-join/secondary-equi-join-multipred.plan b/asterix-app/src/test/resources/optimizerts/results/btree-index-join/secondary-equi-join-multipred.plan
new file mode 100644
index 0000000..0edd774
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/results/btree-index-join/secondary-equi-join-multipred.plan
@@ -0,0 +1,19 @@
+-- SINK_WRITE |PARTITIONED|
+ -- RANDOM_MERGE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- STREAM_SELECT |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- BTREE_SEARCH |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- STABLE_SORT [$$24(ASC)] |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- BTREE_SEARCH |PARTITIONED|
+ -- BROADCAST_EXCHANGE |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- DATASOURCE_SCAN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
diff --git a/asterix-app/src/test/resources/optimizerts/results/scan-delete-rtree-secondary-index.plan b/asterix-app/src/test/resources/optimizerts/results/scan-delete-rtree-secondary-index.plan
index 3961d95..214cf13 100644
--- a/asterix-app/src/test/resources/optimizerts/results/scan-delete-rtree-secondary-index.plan
+++ b/asterix-app/src/test/resources/optimizerts/results/scan-delete-rtree-secondary-index.plan
@@ -2,7 +2,7 @@
-- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
-- INDEX_INSERT_DELETE |UNPARTITIONED|
-- ONE_TO_ONE_EXCHANGE |LOCAL|
- -- STABLE_SORT [$$24(ASC), $$25(ASC), $$26(ASC), $$27(ASC)] |LOCAL|
+ -- STABLE_SORT [$$27(ASC), $$28(ASC), $$29(ASC), $$30(ASC)] |LOCAL|
-- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
-- ASSIGN |UNPARTITIONED|
-- ASSIGN |UNPARTITIONED|
@@ -14,8 +14,8 @@
-- ASSIGN |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ASSIGN |PARTITIONED|
- -- STREAM_SELECT |PARTITIONED|
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- -- DATASOURCE_SCAN |PARTITIONED|
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- BTREE_SEARCH |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-join/btree-primary-equi-join.aql b/asterix-app/src/test/resources/runtimets/queries/index-join/btree-primary-equi-join.aql
new file mode 100644
index 0000000..1015e82
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-join/btree-primary-equi-join.aql
@@ -0,0 +1,57 @@
+/*
+ * Description : Equi joins two datasets, Customers and Orders, based on the customer id.
+ * Given the 'indexnl' hint we expect the join to be transformed
+ * into an indexed nested-loop join using Customers' primary index.
+ * Success : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as open {
+ number: int32,
+ street: string,
+ city: string
+}
+
+create type CustomerType as closed {
+ cid: int32,
+ name: string,
+ cashBack: int32,
+ age: int32?,
+ address: AddressType?,
+ lastorder: {
+ oid: int32,
+ total: float
+ }
+}
+
+create type OrderType as open {
+ oid: int32,
+ cid: int32,
+ orderstatus: string,
+ orderpriority: string,
+ clerk: string,
+ total: float,
+ items: [int32]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+create dataset Orders(OrderType) partitioned by key oid;
+
+load dataset Customers
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/nontagged/customerData.json"),("format"="adm"));
+
+load dataset Orders
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/nontagged/orderData.json"),("format"="adm"));
+
+write output to nc1:"rttest/index-join_btree-primary-equi-join.adm";
+
+for $c in dataset('Customers')
+for $o in dataset('Orders')
+where $c.cid /*+ indexnl */ = $o.cid
+order by $c.cid, $o.oid
+return {"cid":$c.cid, "oid": $o.oid}
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-join/btree-secondary-equi-join.aql b/asterix-app/src/test/resources/runtimets/queries/index-join/btree-secondary-equi-join.aql
new file mode 100644
index 0000000..d1e9824
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-join/btree-secondary-equi-join.aql
@@ -0,0 +1,47 @@
+/*
+ * Description : Equi joins two datasets, DBLP and CSX, based on their title.
+ * DBLP has a secondary btree index on title, and given the 'indexnl' hint
+ * we expect the join to be transformed into an indexed nested-loop join.
+ * Success : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create type CSXType as closed {
+ id: int32,
+ csxid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index title_index on DBLP(authors);
+
+write output to nc1:"rttest/index-join_btree-secondary-equi-join.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where $a.authors /*+ indexnl */ = $b.authors
+order by $a.id, $b.id
+return {"aid": $a.id, "bid": $b.id, "authors": $a.authors}
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/results/index-join/btree-primary-equi-join.adm b/asterix-app/src/test/resources/runtimets/results/index-join/btree-primary-equi-join.adm
new file mode 100644
index 0000000..bc2a454
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/index-join/btree-primary-equi-join.adm
@@ -0,0 +1,3 @@
+{ "cid": 5, "oid": 10 }
+{ "cid": 775, "oid": 10 }
+{ "cid": 775, "oid": 1000 }
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/results/index-join/btree-secondary-equi-join.adm b/asterix-app/src/test/resources/runtimets/results/index-join/btree-secondary-equi-join.adm
new file mode 100644
index 0000000..7dcc1fc
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/index-join/btree-secondary-equi-join.adm
@@ -0,0 +1,5 @@
+{ "aid": 5, "bid": 98, "authors": "Umeshwar Dayal Eric N. Hanson Jennifer Widom" }
+{ "aid": 34, "bid": 57, "authors": "" }
+{ "aid": 54, "bid": 91, "authors": "Lynn Andrea Stein Henry Lieberman David Ungar" }
+{ "aid": 68, "bid": 57, "authors": "" }
+{ "aid": 69, "bid": 57, "authors": "" }
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterix-app/src/test/resources/runtimets/testsuite.xml
index aa4e257..b86e4b3 100644
--- a/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -1696,6 +1696,16 @@
</test-group>
<test-group name="index-join">
<test-case FilePath="index-join">
+ <compilation-unit name="btree-primary-equi-join">
+ <output-file compare="Text">btree-primary-equi-join.adm</output-file>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="index-join">
+ <compilation-unit name="btree-secondary-equi-join">
+ <output-file compare="Text">btree-secondary-equi-join.adm</output-file>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="index-join">
<compilation-unit name="inverted-index-ngram-edit-distance">
<output-file compare="Text">inverted-index-ngram-edit-distance.adm</output-file>
</compilation-unit>
@@ -1725,6 +1735,11 @@
<output-file compare="Text">inverted-index-word-jaccard.adm</output-file>
</compilation-unit>
</test-case>
+ <test-case FilePath="index-join">
+ <compilation-unit name="rtree-spatial-intersect-point">
+ <output-file compare="Text">rtree-spatial-intersect-point.adm</output-file>
+ </compilation-unit>
+ </test-case>
</test-group>
<test-group name="index-selection">
<test-case FilePath="index-selection">