fix for issue 731, needs optimization on existential queries
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-edit-distance-check-substring.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-edit-distance-check-substring.aql
new file mode 100644
index 0000000..a506053
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-edit-distance-check-substring.aql
@@ -0,0 +1,31 @@
+/*
+ * Description : Tests whether an ngram_index index is applied to optimize a selection query using the similarity-edit-distance-check function on the substring of the field.
+ * Tests that the optimizer rule correctly drills through the substring function.
+ * The index should be applied.
+ * Success : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create dataset DBLP(DBLPType) primary key id;
+
+create index ngram_index on DBLP(title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-complex_ngram-edit-distance-check-substring.adm";
+
+for $paper in dataset('DBLP')
+where edit-distance-check(substring($paper.title, 0, 8), "datbase", 1)[0]
+return {
+ "id" : $paper.id,
+ "title" : $paper.title
+}
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-edit-distance-check-word-tokens.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-edit-distance-check-word-tokens.aql
new file mode 100644
index 0000000..8f1c1fc
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-edit-distance-check-word-tokens.aql
@@ -0,0 +1,31 @@
+/*
+ * Description : Tests whether an ngram_index index is applied to optimize a selection query using the similarity-edit-distance-check function on individual word tokens.
+ * Tests that the optimizer rule correctly drills through the word-tokens function and existential query.
+ * The index should be applied.
+ * Success : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create dataset DBLP(DBLPType) primary key id;
+
+create index ngram_index on DBLP(title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-complex_ngram-edit-distance-check-word-tokens.adm";
+
+for $paper in dataset('DBLP')
+where (some $word in word-tokens($paper.title) satisfies edit-distance-check($word, "datbase", 1)[0] )
+return {
+ "id" : $paper.id,
+ "title" : $paper.title
+}
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/optimizerts/results/inverted-index-complex/ngram-edit-distance-check-substring.plan b/asterix-app/src/test/resources/optimizerts/results/inverted-index-complex/ngram-edit-distance-check-substring.plan
new file mode 100644
index 0000000..259a003
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/results/inverted-index-complex/ngram-edit-distance-check-substring.plan
@@ -0,0 +1,19 @@
+-- DISTRIBUTE_RESULT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- STREAM_SELECT |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- BTREE_SEARCH |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STABLE_SORT [$$13(ASC)] |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- LENGTH_PARTITIONED_INVERTED_INDEX_SEARCH |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/optimizerts/results/inverted-index-complex/ngram-edit-distance-check-word-tokens.plan b/asterix-app/src/test/resources/optimizerts/results/inverted-index-complex/ngram-edit-distance-check-word-tokens.plan
new file mode 100644
index 0000000..e62a269
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/results/inverted-index-complex/ngram-edit-distance-check-word-tokens.plan
@@ -0,0 +1,24 @@
+-- DISTRIBUTE_RESULT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- PRE_SORTED_DISTINCT_BY |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- STREAM_SELECT |PARTITIONED|
+ -- UNNEST |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- BTREE_SEARCH |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STABLE_SORT [$$16(ASC)] |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- LENGTH_PARTITIONED_INVERTED_INDEX_SEARCH |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/optimizerts/results/q1.plan b/asterix-app/src/test/resources/optimizerts/results/q1.plan
index aa5daa2..3843154 100644
--- a/asterix-app/src/test/resources/optimizerts/results/q1.plan
+++ b/asterix-app/src/test/resources/optimizerts/results/q1.plan
@@ -2,19 +2,16 @@
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ASSIGN |PARTITIONED|
- -- STREAM_PROJECT |PARTITIONED|
- -- STREAM_SELECT |PARTITIONED|
- -- STREAM_PROJECT |PARTITIONED|
- -- SUBPLAN |PARTITIONED|
- {
- -- AGGREGATE |LOCAL|
- -- STREAM_SELECT |LOCAL|
- -- UNNEST |LOCAL|
- -- NESTED_TUPLE_SOURCE |LOCAL|
- }
- -- STREAM_PROJECT |PARTITIONED|
- -- ASSIGN |PARTITIONED|
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- -- DATASOURCE_SCAN |PARTITIONED|
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- PRE_SORTED_DISTINCT_BY |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- STREAM_SELECT |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- UNNEST |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- DATASOURCE_SCAN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-word-tokens/inverted-index-ngram-edit-distance-word-tokens.1.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-word-tokens/inverted-index-ngram-edit-distance-word-tokens.1.ddl.aql
new file mode 100644
index 0000000..49c6b3b
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-word-tokens/inverted-index-ngram-edit-distance-word-tokens.1.ddl.aql
@@ -0,0 +1,17 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+ primary key id on group1;
+
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-word-tokens/inverted-index-ngram-edit-distance-word-tokens.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-word-tokens/inverted-index-ngram-edit-distance-word-tokens.2.update.aql
new file mode 100644
index 0000000..29632d3
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-word-tokens/inverted-index-ngram-edit-distance-word-tokens.2.update.aql
@@ -0,0 +1,6 @@
+use dataverse test;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-word-tokens/inverted-index-ngram-edit-distance-word-tokens.3.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-word-tokens/inverted-index-ngram-edit-distance-word-tokens.3.ddl.aql
new file mode 100644
index 0000000..4209874
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-word-tokens/inverted-index-ngram-edit-distance-word-tokens.3.ddl.aql
@@ -0,0 +1,3 @@
+use dataverse test;
+
+create index ngram_index on DBLP(title) type ngram(3);
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-word-tokens/inverted-index-ngram-edit-distance-word-tokens.4.query.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-word-tokens/inverted-index-ngram-edit-distance-word-tokens.4.query.aql
new file mode 100644
index 0000000..4584c3e
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-word-tokens/inverted-index-ngram-edit-distance-word-tokens.4.query.aql
@@ -0,0 +1,9 @@
+use dataverse test;
+
+for $paper in dataset('DBLP')
+where (some $word in word-tokens($paper.title) satisfies edit-distance-check($word, "Multmedia", 1)[0] )
+order by $paper.id
+return {
+ "id" : $paper.id,
+ "title" : $paper.title
+}
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/results/index-selection/inverted-index-ngram-edit-distance-word-tokens/inverted-index-ngram-edit-distance-word-tokens.1.adm b/asterix-app/src/test/resources/runtimets/results/index-selection/inverted-index-ngram-edit-distance-word-tokens/inverted-index-ngram-edit-distance-word-tokens.1.adm
new file mode 100644
index 0000000..12593bf
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/index-selection/inverted-index-ngram-edit-distance-word-tokens/inverted-index-ngram-edit-distance-word-tokens.1.adm
@@ -0,0 +1,3 @@
+{ "id": 4, "title": "Multimedia Information Systems Issues and Approaches." }
+{ "id": 89, "title": "VORTEX Video Retrieval and Tracking from Compressed Multimedia Databases." }
+{ "id": 90, "title": "VORTEX Video Retrieval and Tracking from Compressed Multimedia Databases ¾ Visual Search Engine." }
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterix-app/src/test/resources/runtimets/testsuite.xml
index d1297a4..39f01c8 100644
--- a/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -2330,6 +2330,11 @@
</compilation-unit>
</test-case>
<test-case FilePath="index-selection">
+ <compilation-unit name="inverted-index-ngram-edit-distance-word-tokens">
+ <output-dir compare="Text">inverted-index-ngram-edit-distance-word-tokens</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="index-selection">
<compilation-unit name="inverted-index-ngram-jaccard">
<output-dir compare="Text">inverted-index-ngram-jaccard</output-dir>
</compilation-unit>