edit-distance-contains function implementation
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-contains.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-contains.aql
new file mode 100644
index 0000000..5d61ede
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-contains.aql
@@ -0,0 +1,38 @@
+/*
+ * Description : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance-contains function of their authors.
+ * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create type CSXType as closed {
+ id: int32,
+ csxid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create dataset DBLP(DBLPType) primary key id;
+
+create dataset CSX(CSXType) primary key id;
+
+create index ngram_index on DBLP(authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-edit-distance-contains.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where edit-distance-contains($a.authors, $b.authors, 3)[0] and $a.id < $b.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/ngram-edit-distance-contains.plan b/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/ngram-edit-distance-contains.plan
new file mode 100644
index 0000000..56b5df4
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/ngram-edit-distance-contains.plan
@@ -0,0 +1,55 @@
+-- DISTRIBUTE_RESULT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- HYBRID_HASH_JOIN [$$21][$$14] |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- DATASOURCE_SCAN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- HASH_PARTITION_EXCHANGE [$$14] |PARTITIONED|
+ -- UNION_ALL |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- STREAM_SELECT |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- BTREE_SEARCH |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STABLE_SORT [$$26(ASC)] |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- LENGTH_PARTITIONED_INVERTED_INDEX_SEARCH |PARTITIONED|
+ -- BROADCAST_EXCHANGE |PARTITIONED|
+ -- STREAM_SELECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- SPLIT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- DATASOURCE_SCAN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- NESTED_LOOP |PARTITIONED|
+ -- BROADCAST_EXCHANGE |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- DATASOURCE_SCAN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_SELECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- SPLIT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- DATASOURCE_SCAN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.1.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.1.ddl.aql
new file mode 100644
index 0000000..36d37d8
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.1.ddl.aql
@@ -0,0 +1,17 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+ primary key id on group1;
+
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.2.update.aql
new file mode 100644
index 0000000..f590a97
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.2.update.aql
@@ -0,0 +1,6 @@
+use dataverse test;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.3.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.3.ddl.aql
new file mode 100644
index 0000000..4209874
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.3.ddl.aql
@@ -0,0 +1,3 @@
+use dataverse test;
+
+create index ngram_index on DBLP(title) type ngram(3);
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.4.query.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.4.query.aql
new file mode 100644
index 0000000..9749da8
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.4.query.aql
@@ -0,0 +1,9 @@
+use dataverse test;
+
+for $paper in dataset('DBLP')
+where edit-distance-contains($paper.title, "Multmedia", 1)[0]
+order by $paper.id
+return {
+ "id" : $paper.id,
+ "title" : $paper.title
+}
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/results/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.1.adm b/asterix-app/src/test/resources/runtimets/results/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.1.adm
new file mode 100644
index 0000000..12593bf
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.1.adm
@@ -0,0 +1,3 @@
+{ "id": 4, "title": "Multimedia Information Systems Issues and Approaches." }
+{ "id": 89, "title": "VORTEX Video Retrieval and Tracking from Compressed Multimedia Databases." }
+{ "id": 90, "title": "VORTEX Video Retrieval and Tracking from Compressed Multimedia Databases ¾ Visual Search Engine." }
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterix-app/src/test/resources/runtimets/testsuite.xml
index 39f01c8..33227b3 100644
--- a/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -2340,6 +2340,11 @@
</compilation-unit>
</test-case>
<test-case FilePath="index-selection">
+ <compilation-unit name="inverted-index-ngram-edit-distance-contains">
+ <output-dir compare="Text">inverted-index-ngram-edit-distance-contains</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="index-selection">
<compilation-unit name="inverted-index-olist-edit-distance-panic">
<output-dir compare="Text">inverted-index-olist-edit-distance-panic</output-dir>
</compilation-unit>