fixed issue 731, 740, and more.
commit 0b46141bea8d503896dc06308f102131df2e4f3d
Author: Young-Seok <kisskys@gmail.com>
Date: Tue May 20 12:52:54 2014 -0700
fixed issues of access method rules that try to use incompatible indexes and similarity functions
commit a0ea4e411503de265f1883aa3837a45be4a8747a
Merge: bb8fe91 b5785a9
Author: Young-Seok <kisskys@gmail.com>
Date: Sun May 18 13:00:33 2014 -0700
merged from master branch to kisskys/left-outer-join-issue branch
commit bb8fe91ffd4fec3d495d32442020447693be8548
Author: Young-Seok <kisskys@gmail.com>
Date: Sun May 18 11:33:54 2014 -0700
another fix for picking available index for leftouterjoin plan
commit 60b057ecec6a157e3e11cb316ef7d38601483741
Merge: a743e44 6cb7fd9
Author: Young-Seok <kisskys@gmail.com>
Date: Sun May 11 22:22:42 2014 -0700
merged master to kisskys/left-outer-join-issue branch
commit a743e4493f0f84f7a71e671478592d487e7510e3
Author: Young-Seok <kisskys@gmail.com>
Date: Sun May 11 20:51:50 2014 -0700
changes for left-outer-join to pick available indexes
Change-Id: I0d89d20c6cc076f40d1fbc5687f0b70e49a91eed
Reviewed-on: http://fulliautomatix.ics.uci.edu:8443/33
Reviewed-by: Inci Cetindil <icetindil@gmail.com>
Tested-by: Ian Maxon <imaxon@uci.edu>
diff --git a/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_01.aql b/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_01.aql
new file mode 100644
index 0000000..25818a7
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_01.aql
@@ -0,0 +1,53 @@
+/*
+ * Description : Test that left-outer-join may use two available indexes, one for primary index in prob subtree and another for secondary btree index in index subtree.
+ * Issue : 730, 741
+ * Expected Res : Success
+ * Date : 8th May 2014
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type TwitterUserType as closed {
+ screen-name: string,
+ lang: string,
+ friends-count: int32,
+ statuses-count: int32,
+ name: string,
+ followers-count: int32
+}
+
+create type TweetMessageType as closed {
+ tweetid: int64,
+ user: TwitterUserType,
+ sender-location: point,
+ send-time: datetime,
+ referred-topics: {{ string }},
+ message-text: string,
+ countA: int32,
+ countB: int32
+}
+
+create dataset TweetMessages(TweetMessageType)
+primary key tweetid;
+
+create index twmSndLocIx on TweetMessages(sender-location) type rtree;
+create index msgCountAIx on TweetMessages(countA) type btree;
+create index msgCountBIx on TweetMessages(countB) type btree;
+create index msgTextIx on TweetMessages(message-text) type keyword;
+
+write output to nc1:"rttest/btree-index-join_leftouterjoin-probe-pidx-with-join-btree-sidx_01.adm";
+
+for $t1 in dataset('TweetMessages')
+where $t1.tweetid < int64("10")
+order by $t1.tweetid
+return {
+"tweetid1": $t1.tweetid,
+"count1":$t1.countA,
+"t2info": for $t2 in dataset('TweetMessages')
+ where $t1.countA /* +indexnl */= $t2.countB
+ order by $t2.tweetid
+ return {"tweetid2": $t2.tweetid,
+ "count2":$t2.countB}
+};
diff --git a/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_02.aql b/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_02.aql
new file mode 100644
index 0000000..70a31e1
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_02.aql
@@ -0,0 +1,55 @@
+/*
+ * Description : Test that left-outer-join may use two available indexes, one for primary index in prob subtree and another for secondary btree index in index subtree.
+ * Issue : 730, 741
+ * Expected Res : Success
+ * Date : 8th May 2014
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type TwitterUserType as closed {
+ screen-name: string,
+ lang: string,
+ friends-count: int32,
+ statuses-count: int32,
+ name: string,
+ followers-count: int32
+}
+
+create type TweetMessageType as closed {
+ tweetid: int64,
+ user: TwitterUserType,
+ sender-location: point,
+ send-time: datetime,
+ referred-topics: {{ string }},
+ message-text: string,
+ countA: int32,
+ countB: int32
+}
+
+create dataset TweetMessages(TweetMessageType)
+primary key tweetid;
+
+create index twmSndLocIx on TweetMessages(sender-location) type rtree;
+create index msgCountAIx on TweetMessages(countA) type btree;
+create index msgCountBIx on TweetMessages(countB) type btree;
+create index msgTextIx on TweetMessages(message-text) type keyword;
+
+write output to nc1:"rttest/btree-index-join_leftouterjoin-probe-pidx-with-join-btree-sidx_02.adm";
+
+for $t1 in dataset('TweetMessages')
+where $t1.tweetid < int64("10")
+order by $t1.tweetid
+return {
+"tweetid1": $t1.tweetid,
+"count1":$t1.countA,
+"t2info": for $t2 in dataset('TweetMessages')
+ where $t1.countA /* +indexnl */= $t2.countB and
+ $t1.tweetid != $t2.tweetid
+ order by $t2.tweetid
+ return {"tweetid2": $t2.tweetid,
+ "count2":$t2.countB}
+};
+
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/issue741.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/issue741.aql
new file mode 100644
index 0000000..7402ec9
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/issue741.aql
@@ -0,0 +1,47 @@
+/*
+ * Description : Test that left-outer-join may use an available inverted index in index subtree.
+ * Issue : 741
+ * Expected Res : Success
+ * Date : 16th May 2014
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type TwitterUserType as {
+screen_name: string,
+lang: string,
+friends_count: int32,
+statuses_count: int32,
+name: string,
+followers_count: int32
+}
+
+create type TweetMessageType as {
+tweetid: int64,
+user: TwitterUserType,
+sender_location: point?,
+send_time: datetime,
+referred_topics: {{ string }},
+message_text: string
+}
+
+create dataset TweetMessages(TweetMessageType) primary key tweetid;
+
+create index topicIIx on TweetMessages(referred_topics) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_issue741.adm";
+
+for $t in dataset('TweetMessages')
+where $t.send_time >= datetime('2011-06-18T14:10:17')
+and
+$t.send_time < datetime('2011-06-18T15:10:17')
+return {
+ "tweet": $t.tweetid,
+ "similar-tweets": for $t2 in dataset('TweetMessages')
+ let $sim := similarity-jaccard-check($t.referred_topics, $t2.referred_topics, 0.6f)
+ where $sim[0] and
+ $t2.tweetid != $t.tweetid
+ return $t2.tweetid
+}
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql
new file mode 100644
index 0000000..7adbf3a
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql
@@ -0,0 +1,55 @@
+/*
+ * Description : Test that left-outer-join may use two available indexes, one for primary index in prob subtree and another for secondary rtree index in index subtree.
+ * Issue : 730, 741
+ * Expected Res : Success
+ * Date : 8th May 2014
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type TwitterUserType as closed {
+ screen-name: string,
+ lang: string,
+ friends-count: int32,
+ statuses-count: int32,
+ name: string,
+ followers-count: int32
+}
+
+create type TweetMessageType as closed {
+ tweetid: int64,
+ user: TwitterUserType,
+ sender-location: point,
+ send-time: datetime,
+ referred-topics: {{ string }},
+ message-text: string,
+ countA: int32,
+ countB: int32
+}
+
+create dataset TweetMessages(TweetMessageType)
+primary key tweetid;
+
+create index twmSndLocIx on TweetMessages(sender-location) type rtree;
+create index msgCountAIx on TweetMessages(countA) type btree;
+create index msgCountBIx on TweetMessages(countB) type btree;
+create index msgKeywordIx on TweetMessages(message-text) type keyword;
+create index msgNgramIx on TweetMessages(message-text) type ngram(3);
+create index topicKeywordIx on TweetMessages(referred-topics) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_leftouterjoin-probe-pidx-with-join-edit-distance-check_idx_01.adm";
+
+for $t1 in dataset('TweetMessages')
+where $t1.tweetid > int64("240")
+order by $t1.tweetid
+return {
+ "tweet": {"id": $t1.tweetid, "topics" : $t1.message-text} ,
+ "similar-tweets": for $t2 in dataset('TweetMessages')
+ let $sim := edit-distance-check($t1.message-text, $t2.message-text, 7)
+ where $sim[0] and
+ $t2.tweetid != $t1.tweetid
+ order by $t2.tweetid
+ return {"id": $t2.tweetid, "topics" : $t2.message-text}
+};
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-jaccard-check-idx_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-jaccard-check-idx_01.aql
new file mode 100644
index 0000000..b7cd179
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-jaccard-check-idx_01.aql
@@ -0,0 +1,55 @@
+/*
+ * Description : Test that left-outer-join may use two available indexes, one for primary index in prob subtree and another for secondary rtree index in index subtree.
+ * Issue : 730, 741
+ * Expected Res : Success
+ * Date : 8th May 2014
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type TwitterUserType as closed {
+ screen-name: string,
+ lang: string,
+ friends-count: int32,
+ statuses-count: int32,
+ name: string,
+ followers-count: int32
+}
+
+create type TweetMessageType as closed {
+ tweetid: int64,
+ user: TwitterUserType,
+ sender-location: point,
+ send-time: datetime,
+ referred-topics: {{ string }},
+ message-text: string,
+ countA: int32,
+ countB: int32
+}
+
+create dataset TweetMessages(TweetMessageType)
+primary key tweetid;
+
+create index twmSndLocIx on TweetMessages(sender-location) type rtree;
+create index msgCountAIx on TweetMessages(countA) type btree;
+create index msgCountBIx on TweetMessages(countB) type btree;
+create index msgKeywordIx on TweetMessages(message-text) type keyword;
+create index msgNgramIx on TweetMessages(message-text) type ngram(3);
+create index topicKeywordIx on TweetMessages(referred-topics) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_leftouterjoin-probe-pidx-with-join-jaccard-check_idx_01.adm";
+
+for $t1 in dataset('TweetMessages')
+where $t1.tweetid > int64("240")
+order by $t1.tweetid
+return {
+ "tweet": {"id": $t1.tweetid, "topics" : $t1.referred-topics} ,
+ "similar-tweets": for $t2 in dataset('TweetMessages')
+ let $sim := similarity-jaccard-check($t1.referred-topics, $t2.referred-topics, 0.5f)
+ where $sim[0] and
+ $t2.tweetid != $t1.tweetid
+ order by $t2.tweetid
+ return {"id": $t2.tweetid, "topics" : $t2.referred-topics}
+};
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-contains.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-contains.aql
new file mode 100644
index 0000000..676ed3b
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-contains.aql
@@ -0,0 +1,47 @@
+/*
+ * Description : Tests whether an ngram_index is applied to optimize a join query using the contains function.
+ * The index should be applied.
+ * Success : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create dataset DBLP(DBLPType) primary key id;
+
+create index ngram_index on DBLP(title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-contains.adm";
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create dataset DBLP(DBLPType) primary key id;
+
+create index ngram_index on DBLP(title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-contains.adm";
+
+for $o1 in dataset('DBLP')
+for $o2 in dataset('DBLP')
+where contains($o1.title, $o2.title) and $o1.id < $o2.id
+order by $o1.id, $o2.id
+return {"title1":$o1.title, "title2":$o2.title}
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check-after-btree-access.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check-after-btree-access.aql
index d656045..f58f6bc 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check-after-btree-access.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check-after-btree-access.aql
@@ -42,7 +42,7 @@
for $t1 in dataset('TweetMessages')
for $t2 in dataset('TweetMessages')
-let $sim := similarity-jaccard-check($t1.message-text, $t2.message-text, 0.6f)
+let $sim := similarity-jaccard-check(word-tokens($t1.message-text), word-tokens($t2.message-text), 0.6f)
where $sim[0] and $t1.tweetid < int64("20") and $t2.tweetid != $t1.tweetid
return {
"t1": $t1.tweetid,
diff --git a/asterix-app/src/test/resources/optimizerts/queries/rtree-index-join/issue730.aql b/asterix-app/src/test/resources/optimizerts/queries/rtree-index-join/issue730.aql
new file mode 100644
index 0000000..e67d702
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/rtree-index-join/issue730.aql
@@ -0,0 +1,44 @@
+/*
+ * Description : Test that left-outer-join may use an available rtree index in index subtree.
+ * Issue : 730
+ * Expected Res : Success
+ * Date : 8th May 2014
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type TwitterUserType as {
+screen_name: string,
+lang: string,
+friends_count: int32,
+statuses_count: int32,
+name: string,
+followers_count: int32
+}
+
+create type TweetMessageType as {
+tweetid: int64,
+user: TwitterUserType,
+sender_location: point?,
+send_time: datetime,
+referred_topics: {{ string }},
+message_text: string
+}
+
+create dataset TweetMessages(TweetMessageType) primary key tweetid;
+
+create index twmSndLocIx on TweetMessages(sender_location) type rtree;
+
+write output to nc1:"rttest/rtree-index-join_issue730.adm";
+
+for $t1 in dataset('TweetMessages')
+where $t1.send_time >= datetime('2011-06-18T14:10:17') and $t1.send_time < datetime('2011-06-18T15:10:17')
+let $n := create-circle($t1.sender_location, 5.0)
+return {
+"message": $t1.tweetid,
+"nearby-message": for $t2 in dataset('TweetMessages')
+ where spatial-intersect($t2.sender_location, $n)
+ return $t2.tweetid
+}
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/optimizerts/queries/rtree-index-join/leftouterjoin-probe-pidx-with-join-rtree-sidx_01.aql b/asterix-app/src/test/resources/optimizerts/queries/rtree-index-join/leftouterjoin-probe-pidx-with-join-rtree-sidx_01.aql
new file mode 100644
index 0000000..9b72850
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/rtree-index-join/leftouterjoin-probe-pidx-with-join-rtree-sidx_01.aql
@@ -0,0 +1,53 @@
+/*
+ * Description : Test that left-outer-join may use two available indexes, one for primary index in prob subtree and another for secondary rtree index in index subtree.
+ * Issue : 730, 741
+ * Expected Res : Success
+ * Date : 8th May 2014
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type TwitterUserType as closed {
+ screen-name: string,
+ lang: string,
+ friends-count: int32,
+ statuses-count: int32,
+ name: string,
+ followers-count: int32
+}
+
+create type TweetMessageType as closed {
+ tweetid: int64,
+ user: TwitterUserType,
+ sender-location: point,
+ send-time: datetime,
+ referred-topics: {{ string }},
+ message-text: string,
+ countA: int32,
+ countB: int32
+}
+
+create dataset TweetMessages(TweetMessageType)
+primary key tweetid;
+
+create index twmSndLocIx on TweetMessages(sender-location) type rtree;
+create index msgCountAIx on TweetMessages(countA) type btree;
+create index msgCountBIx on TweetMessages(countB) type btree;
+create index msgTextIx on TweetMessages(message-text) type keyword;
+
+write output to nc1:"rttest/rtree-index-join_leftouterjoin-probe-pidx-with-join-rtree-sidx_01.adm";
+
+for $t1 in dataset('TweetMessages')
+let $n := create-circle($t1.sender-location, 0.5)
+where $t1.tweetid < int64("10")
+order by $t1.tweetid
+return {
+"tweetid1": $t1.tweetid,
+"loc1":$t1.sender-location,
+"nearby-message": for $t2 in dataset('TweetMessages')
+ where spatial-intersect($t2.sender-location, $n)
+ order by $t2.tweetid
+ return {"tweetid2":$t2.tweetid, "loc2":$t2.sender-location}
+};
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/optimizerts/queries/rtree-index-join/leftouterjoin-probe-pidx-with-join-rtree-sidx_02.aql b/asterix-app/src/test/resources/optimizerts/queries/rtree-index-join/leftouterjoin-probe-pidx-with-join-rtree-sidx_02.aql
new file mode 100644
index 0000000..d71faad
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/rtree-index-join/leftouterjoin-probe-pidx-with-join-rtree-sidx_02.aql
@@ -0,0 +1,53 @@
+/*
+ * Description : Test that left-outer-join may use two available indexes, one for primary index in prob subtree and another for secondary rtree index in index subtree.
+ * Issue : 730, 741
+ * Expected Res : Success
+ * Date : 8th May 2014
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type TwitterUserType as closed {
+ screen-name: string,
+ lang: string,
+ friends-count: int32,
+ statuses-count: int32,
+ name: string,
+ followers-count: int32
+}
+
+create type TweetMessageType as closed {
+ tweetid: int64,
+ user: TwitterUserType,
+ sender-location: point,
+ send-time: datetime,
+ referred-topics: {{ string }},
+ message-text: string,
+ countA: int32,
+ countB: int32
+}
+
+create dataset TweetMessages(TweetMessageType)
+primary key tweetid;
+
+create index twmSndLocIx on TweetMessages(sender-location) type rtree;
+create index msgCountAIx on TweetMessages(countA) type btree;
+create index msgCountBIx on TweetMessages(countB) type btree;
+create index msgTextIx on TweetMessages(message-text) type keyword;
+
+write output to nc1:"rttest/rtree-index-join_leftouterjoin-probe-pidx-with-join-rtree-sidx_02.adm";
+
+for $t1 in dataset('TweetMessages')
+let $n := create-circle($t1.sender-location, 0.5)
+where $t1.tweetid < int64("10")
+order by $t1.tweetid
+return {
+"tweetid1": $t1.tweetid,
+"loc1":$t1.sender-location,
+"nearby-message": for $t2 in dataset('TweetMessages')
+ where spatial-intersect($t2.sender-location, $n) and $t1.tweetid != $t2.tweetid
+ order by $t2.tweetid
+ return {"tweetid2":$t2.tweetid, "loc2":$t2.sender-location}
+};
\ No newline at end of file