Reintegrated asterix-fuzzy. git-svn-id: https://asterixdb.googlecode.com/svn/branches/asterix_stabilization@437 eaa15691-b419-025a-1212-ee371bd00084

commit: c576c60758cc4af9a2d5dc09651f31d338b0aecf [log] [tgz]
author: alexander.behm <alexander.behm@eaa15691-b419-025a-1212-ee371bd00084> Fri Jul 06 02:41:15 2012 +0000
committer: alexander.behm <alexander.behm@eaa15691-b419-025a-1212-ee371bd00084> Fri Jul 06 02:41:15 2012 +0000
tree: 9107af14fcc729c75e99a19d2ebbfbe00c95f506
parent: 411f1e4d2a0b3a8ad96ad6cc4c3163d4a77142c0 [diff]
diff --git a/asterix-app/src/test/resources/optimizerts/queries/consolidate-selects-complex.aql b/asterix-app/src/test/resources/optimizerts/queries/consolidate-selects-complex.aql
new file mode 100644
index 0000000..313e9a4
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/consolidate-selects-complex.aql

@@ -0,0 +1,31 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType) 
+  partitioned by key id on group1;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/consolidate-complex-selects.aql";
+
+for $paper in dataset('DBLP')
+let $paper_tokens := word-tokens($paper.title)
+let $query_tokens := word-tokens("Transactions for Cooperative Environments")
+let $jacca := similarity-jaccard-check($paper_tokens, $query_tokens, 0.8f)
+let $jaccb := similarity-jaccard-check($paper_tokens, $query_tokens, 0.5f)
+let $jaccc := similarity-jaccard-check($paper_tokens, $query_tokens, 0.3f)
+where $jacca[0] and $jaccb[0] and $paper.authors = "testauth" and $jaccc
+return {"Paper": $paper_tokens, "Query": $query_tokens }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/consolidate-selects-simple.aql b/asterix-app/src/test/resources/optimizerts/queries/consolidate-selects-simple.aql
new file mode 100644
index 0000000..8a237d2
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/consolidate-selects-simple.aql

@@ -0,0 +1,28 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type CustomerType as closed {
+  c_custkey: int32, 
+  c_name: string, 
+  c_address: string, 
+  c_nationkey: int32, 
+  c_phone: string, 
+  c_acctbal: double, 
+  c_mktsegment: string,
+  c_comment: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset Customers(CustomerType) 
+  partitioned by key c_custkey on group1;
+
+write output to nc1:"rttest/consolidate-selects-simple.aql";
+
+for $c in dataset('Customers')
+where $c.c_name = "testname" 
+  and $c.c_address = "testaddr"
+  and $c.c_nationkey = 1
+  and $c.c_phone = "123456789"
+return $c
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-contains-panic.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-contains-panic.aql
new file mode 100644
index 0000000..66ef43b
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-contains-panic.aql

@@ -0,0 +1,33 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using the contains function.
+ *                  The index should *not* be applied (see below).
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-basic_ngram-contains-panic.adm";
+
+// Cannot optimize this query because the string constant is shorter than the gram length.
+for $o in dataset('DBLP')
+where contains($o.title, "Mu")
+order by $o.id
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-contains.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-contains.aql
new file mode 100644
index 0000000..0aa8056
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-contains.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using the contains function.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-basic_ngram-contains.adm";
+
+for $o in dataset('DBLP')
+where contains($o.title, "Multimedia")
+order by $o.id
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-edit-distance-check-panic.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-edit-distance-check-panic.aql
new file mode 100644
index 0000000..21e41f8
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-edit-distance-check-panic.aql

@@ -0,0 +1,33 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using the edit-distance-check function on strings.
+ *                  The index should *not* be applied (see below).
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-basic_ngram-edit-distance-check-panic.adm";
+
+// This query cannot be optimized with an index, based on the high edit distance.
+for $o in dataset('DBLP')
+let $ed := edit-distance-check($o.authors, "Amihay Motro", 5)
+where $ed[0]
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-edit-distance-check.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-edit-distance-check.aql
new file mode 100644
index 0000000..ff3728d
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-edit-distance-check.aql

@@ -0,0 +1,31 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using the edit-distance-check function on strings.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-basic_ngram-edit-distance-check.adm";
+
+for $o in dataset('DBLP')
+where edit-distance-check($o.authors, "Amihay Motro", 1)[0]
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-edit-distance-panic.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-edit-distance-panic.aql
new file mode 100644
index 0000000..c993c57
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-edit-distance-panic.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using the edit-distance function on strings.
+ *                  The index should *not* be applied (see below).
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-basic_ngram-edit-distance-panic.adm";
+
+// This query cannot be optimized with an index, based on the high edit distance.
+for $o in dataset('DBLP')
+where edit-distance($o.authors, "Amihay Motro") <= 5
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-edit-distance.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-edit-distance.aql
new file mode 100644
index 0000000..a143cde
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-edit-distance.aql

@@ -0,0 +1,31 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using the edit-distance function on strings.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-basic_ngram-edit-distance.adm";
+
+for $o in dataset('DBLP')
+where edit-distance($o.authors, "Amihay Motro") <= 1
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-fuzzyeq-edit-distance.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-fuzzyeq-edit-distance.aql
new file mode 100644
index 0000000..87de1fb
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-fuzzyeq-edit-distance.aql

@@ -0,0 +1,34 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query with ~= using edit-distance on strings.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-basic_ngram-fuzzyeq-edit-distance.adm";
+
+set simfunction 'edit-distance';
+set simthreshold '1';
+
+for $o in dataset('DBLP')
+where $o.authors ~= "Amihay Motro"
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-fuzzyeq-jaccard.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-fuzzyeq-jaccard.aql
new file mode 100644
index 0000000..3db4c92
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-fuzzyeq-jaccard.aql

@@ -0,0 +1,34 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query with ~= using Jaccard on 3-gram tokens.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-basic_ngram-fuzzyeq-jaccard.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.8f';
+
+for $o in dataset('DBLP')
+where gram-tokens($o.title, 3, false) ~= gram-tokens("Transactions for Cooperative Environments", 3, false)
+return $o

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-jaccard-check.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-jaccard-check.aql
new file mode 100644
index 0000000..c39d531
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-jaccard-check.aql

@@ -0,0 +1,31 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using the similarity-jaccard-check function on 3-gram tokens.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-basic_ngram-jaccard-check.adm";
+
+for $o in dataset('DBLP')
+where similarity-jaccard-check(gram-tokens($o.title, 3, false), gram-tokens("Transactions for Cooperative Environments", 3, false), 0.5f)[0]
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-jaccard.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-jaccard.aql
new file mode 100644
index 0000000..3855ade
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ngram-jaccard.aql

@@ -0,0 +1,31 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using the similarity-jaccard function on 3-gram tokens.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-basic_ngram-jaccard.adm";
+
+for $o in dataset('DBLP')
+where similarity-jaccard(gram-tokens($o.title, 3, false), gram-tokens("Transactions for Cooperative Environments", 3, false)) >= 0.5f
+return $o

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-edit-distance-check-panic.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-edit-distance-check-panic.aql
new file mode 100644
index 0000000..6a6700a
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-edit-distance-check-panic.aql

@@ -0,0 +1,40 @@
+/*
+ * Description    : Tests whether a keyword index is applied to optimize a selection query using the edit-distance-check function on lists.
+ *                  The index should *not* be applied (see below).
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-basic_olist-edit-distance-check-panic.adm";
+
+// Index should not be applied because all list elements can be modified by 3 edit operations.
+for $c in dataset('Customers')
+where edit-distance-check($c.interests, ["computers", "wine", "walking"], 3)[0]
+order by $c.cid
+return $c

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-edit-distance-check.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-edit-distance-check.aql
new file mode 100644
index 0000000..f705a4a
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-edit-distance-check.aql

@@ -0,0 +1,39 @@
+/*
+ * Description    : Tests whether a keyword index is applied to optimize a selection query using the edit-distance-check function on lists.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-basic_olist-edit-distance-check.adm";
+
+for $c in dataset('Customers')
+where edit-distance-check($c.interests, ["computers", "wine", "walking"], 1)[0]
+order by $c.cid
+return $c

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-edit-distance-panic.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-edit-distance-panic.aql
new file mode 100644
index 0000000..1fc7906
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-edit-distance-panic.aql

@@ -0,0 +1,40 @@
+/*
+ * Description    : Tests whether a keyword index is applied to optimize a selection query using the edit-distance function on lists.
+ *                  The index should *not* be applied (see below).
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-basic_olist-edit-distance-panic.adm";
+
+// Index should not be applied because all list elements can be modified by 3 edit operations.
+for $c in dataset('Customers')
+where edit-distance($c.interests, ["computers", "wine", "walking"]) <= 3
+order by $c.cid
+return $c

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-edit-distance.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-edit-distance.aql
new file mode 100644
index 0000000..70268f5
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-edit-distance.aql

@@ -0,0 +1,39 @@
+/*
+ * Description    : Tests whether a keyword index is applied to optimize a selection query using the edit-distance function on lists.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-basic_olist-edit-distance.adm";
+
+for $c in dataset('Customers')
+where edit-distance($c.interests, ["computers", "wine", "walking"]) <= 1
+order by $c.cid
+return $c

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-fuzzyeq-edit-distance.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-fuzzyeq-edit-distance.aql
new file mode 100644
index 0000000..90ce266
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-fuzzyeq-edit-distance.aql

@@ -0,0 +1,42 @@
+/*
+ * Description    : Tests whether a keyword index is applied to optimize a selection query with ~= using edit-distance on lists.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-basic_olist-fuzzyeq-edit-distance.adm";
+
+set simfunction 'edit-distance';
+set simthreshold '1';
+
+for $c in dataset('Customers')
+where $c.interests ~= ["computers", "wine", "walking"]
+order by $c.cid
+return $c

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-fuzzyeq-jaccard.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-fuzzyeq-jaccard.aql
new file mode 100644
index 0000000..09da02e
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-fuzzyeq-jaccard.aql

@@ -0,0 +1,41 @@
+/*
+ * Description    : Tests whether a keyword is applied to optimize a selection query with ~= using Jaccard on lists.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-basic_olist-fuzzyeq-jaccard.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.8f';
+
+for $c in dataset('Customers')
+where $c.interests ~= ["databases", "computers", "wine"]
+return $c

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-jaccard-check.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-jaccard-check.aql
new file mode 100644
index 0000000..18721c5
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-jaccard-check.aql

@@ -0,0 +1,38 @@
+/*
+ * Description    : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on lists.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-basic_olist-jaccard-check.adm";
+
+for $c in dataset('Customers')
+where similarity-jaccard-check($c.interests, ["databases", "computers", "wine"], 0.7f)[0]
+return $c

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-jaccard.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-jaccard.aql
new file mode 100644
index 0000000..fa2291c
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/olist-jaccard.aql

@@ -0,0 +1,38 @@
+/*
+ * Description    : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard function on lists.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-basic_olist-jaccard.adm";
+
+for $c in dataset('Customers')
+where similarity-jaccard($c.interests, ["databases", "computers", "wine"]) >= 0.7f
+return $c

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ulist-fuzzyeq-jaccard.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ulist-fuzzyeq-jaccard.aql
new file mode 100644
index 0000000..89c2dac
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ulist-fuzzyeq-jaccard.aql

@@ -0,0 +1,41 @@
+/*
+ * Description    : Tests whether a keyword is applied to optimize a selection query with ~= using Jaccard on sets.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: {{string}},
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-basic_ulist-fuzzyeq-jaccard.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.8f';
+
+for $c in dataset('Customers')
+where $c.interests ~= ["databases", "computers", "wine"]
+return $c

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ulist-jaccard-check.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ulist-jaccard-check.aql
new file mode 100644
index 0000000..8089d6d
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ulist-jaccard-check.aql

@@ -0,0 +1,38 @@
+/*
+ * Description    : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on sets.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: {{string}},
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-basic_ulist-jaccard.adm";
+
+for $c in dataset('Customers')
+where similarity-jaccard-check($c.interests, ["databases", "computers", "wine"], 0.7f)[0]
+return $c

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ulist-jaccard.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ulist-jaccard.aql
new file mode 100644
index 0000000..547f7e2
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/ulist-jaccard.aql

@@ -0,0 +1,38 @@
+/*
+ * Description    : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard function on sets.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: {{string}},
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-basic_ulist-jaccard.adm";
+
+for $c in dataset('Customers')
+where similarity-jaccard($c.interests, ["databases", "computers", "wine"]) >= 0.7f
+return $c

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/word-contains.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/word-contains.aql
new file mode 100644
index 0000000..0b5367b
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/word-contains.aql

@@ -0,0 +1,33 @@
+/*
+ * Description    : Tests whether a keyword index is applied to optimize a selection query using the contains function.
+ *                  The index should *not* be applied (see below).
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index keyword_index on DBLP(title) type keyword;
+
+write output to nc1:"rttest/inverted-index-basic_word-contains.adm";
+
+// Contains cannot be answered with a word inverted index.
+for $o in dataset('DBLP')
+where contains($o.title, "Multimedia")
+order by $o.id
+return $o

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/word-fuzzyeq-jaccard.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/word-fuzzyeq-jaccard.aql
new file mode 100644
index 0000000..39b247d
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/word-fuzzyeq-jaccard.aql

@@ -0,0 +1,34 @@
+/*
+ * Description    : Tests whether a keyword is applied to optimize a selection query with ~= using Jaccard on word tokens.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index keyword_index on DBLP(title) type keyword;
+
+write output to nc1:"rttest/inverted-index-basic_word-fuzzyeq-jaccard.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.5f';
+
+for $o in dataset('DBLP')
+where word-tokens($o.title) ~= word-tokens("Transactions for Cooperative Environments")
+return $o

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/word-jaccard-check.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/word-jaccard-check.aql
new file mode 100644
index 0000000..f1d3d4b
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/word-jaccard-check.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on word tokens.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index keyword_index on DBLP(title) type keyword;
+
+write output to nc1:"rttest/inverted-index-basic_word-jaccard-check.adm";
+
+for $o in dataset('DBLP')
+where similarity-jaccard-check(word-tokens($o.title), word-tokens("Transactions for Cooperative Environments"), 0.5f)[0]
+return $o
+

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/word-jaccard.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/word-jaccard.aql
new file mode 100644
index 0000000..16fb8d3
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-basic/word-jaccard.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard function on word tokens.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index keyword_index on DBLP(title) type keyword;
+
+write output to nc1:"rttest/inverted-index-basic_word-jaccard.adm";
+
+for $o in dataset('DBLP')
+where similarity-jaccard(word-tokens($o.title), word-tokens("Transactions for Cooperative Environments")) >= 0.5f
+return $o
+

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_01.aql
new file mode 100644
index 0000000..8c9500f
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_01.aql

@@ -0,0 +1,36 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using 
+ *                  two edit-distance-check function of which only the first can be optimized with an index. 
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-complex_ngram-edit-distance-check-let-panic-nopanic_01.adm";
+
+// Only the first edit-distance-check can be optimized with an index.
+for $o in dataset('DBLP')
+let $eda := edit-distance-check($o.authors, "Amihay Motro", 3)
+let $edb := edit-distance-check($o.authors, "Amihay Motro", 5)
+where $eda[0] and $edb[0] 
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_02.aql
new file mode 100644
index 0000000..55bd024
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_02.aql

@@ -0,0 +1,36 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using 
+ *                  two edit-distance-check function of which only the second can be optimized with an index.
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-complex_ngram-edit-distance-check-let-panic-nopanic_01.adm";
+
+// Only the second edit-distance-check can be optimized with an index.
+for $o in dataset('DBLP')
+let $edb := edit-distance-check($o.authors, "Amihay Motro", 5)
+let $eda := edit-distance-check($o.authors, "Amihay Motro", 3)
+where $edb[0] and $eda[0] 
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-edit-distance-check-let-panic.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-edit-distance-check-let-panic.aql
new file mode 100644
index 0000000..458425f
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-edit-distance-check-let-panic.aql

@@ -0,0 +1,34 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using the edit-distance-check function on strings.
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should *not* be applied (see below).
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-complex_ngram-edit-distance-check-let-panic.adm";
+
+// This query cannot be optimized with an index, based on the high edit distance.
+for $o in dataset('DBLP')
+let $ed := edit-distance-check($o.authors, "Amihay Motro", 5)
+where $ed[0]
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-edit-distance-check-let.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-edit-distance-check-let.aql
new file mode 100644
index 0000000..f5c9a18
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-edit-distance-check-let.aql

@@ -0,0 +1,33 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using the edit-distance-check function on strings.
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-complex_ngram-edit-distance-check-let.adm";
+
+for $o in dataset('DBLP')
+let $ed := edit-distance-check($o.authors, "Amihay Motro", 1)
+where $ed[0]
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-jaccard-check-let.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-jaccard-check-let.aql
new file mode 100644
index 0000000..5d3758e
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-jaccard-check-let.aql

@@ -0,0 +1,33 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using the similarity-jaccard-check function on 3-gram tokens.
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-complex_ngram-jaccard-check-let.adm";
+
+for $o in dataset('DBLP')
+let $jacc := similarity-jaccard-check(gram-tokens($o.title, 3, false), gram-tokens("Transactions for Cooperative Environments", 3, false), 0.5f)
+where $jacc[0]
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-jaccard-check-multi-let.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-jaccard-check-multi-let.aql
new file mode 100644
index 0000000..30d97b8
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ngram-jaccard-check-multi-let.aql

@@ -0,0 +1,36 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using the similarity-jaccard-check function on 3-gram tokens.
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-complex_ngram-jaccard-check-multi-let.adm";
+
+// This test is complex because we have three assigns to drill into.
+for $paper in dataset('DBLP')
+let $paper_tokens := gram-tokens($paper.title, 3, false)
+let $query_tokens := gram-tokens("Transactions for Cooperative Environments", 3, false)
+let $jacc := similarity-jaccard-check($paper_tokens, $query_tokens, 0.5f)
+where $jacc[0]
+return {"Paper": $paper_tokens, "Query": $query_tokens }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/olist-edit-distance-check-let-panic.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/olist-edit-distance-check-let-panic.aql
new file mode 100644
index 0000000..51e66a1
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/olist-edit-distance-check-let-panic.aql

@@ -0,0 +1,41 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using the edit-distance-check function on lists.
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should *not* be applied (see below).
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-complex_olist-edit-distance-check-let-panic.adm";
+
+for $c in dataset('Customers')
+let $ed := edit-distance-check($c.interests, ["computers", "wine", "walking"], 3)
+where $ed[0]
+order by $c.cid
+return $c

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/olist-edit-distance-check-let.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/olist-edit-distance-check-let.aql
new file mode 100644
index 0000000..2d6c4cd
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/olist-edit-distance-check-let.aql

@@ -0,0 +1,41 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using the edit-distance-check function on lists.
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-complex_olist-edit-distance-check-let.adm";
+
+for $c in dataset('Customers')
+let $ed := edit-distance-check($c.interests, ["computers", "wine", "walking"], 1)
+where $ed[0]
+order by $c.cid
+return $c

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/olist-jaccard-check-let.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/olist-jaccard-check-let.aql
new file mode 100644
index 0000000..ff9d8e6
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/olist-jaccard-check-let.aql

@@ -0,0 +1,40 @@
+/*
+ * Description    : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on lists.
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-complex_olist-jaccard-check-let.adm";
+
+for $c in dataset('Customers')
+let $jacc := similarity-jaccard-check($c.interests, ["databases", "computers", "wine"], 0.7f)
+where $jacc[0]
+return $c

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ulist-jaccard-check-let.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ulist-jaccard-check-let.aql
new file mode 100644
index 0000000..57d9244
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ulist-jaccard-check-let.aql

@@ -0,0 +1,40 @@
+/*
+ * Description    : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on lists.
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: {{string}},
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-complex_ulist-jaccard-check-let.adm";
+
+for $c in dataset('Customers')
+let $jacc := similarity-jaccard-check($c.interests, ["databases", "computers", "wine"], 0.7f)
+where $jacc[0]
+return $c

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-let.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-let.aql
new file mode 100644
index 0000000..f79e10f
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-let.aql

@@ -0,0 +1,34 @@
+/*
+ * Description    : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on word tokens.
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index keyword_index on DBLP(title) type keyword;
+
+write output to nc1:"rttest/inverted-index-complex_word-jaccard-check-let.adm";
+
+for $o in dataset('DBLP')
+let $jacc := similarity-jaccard-check(word-tokens($o.title), word-tokens("Transactions for Cooperative Environments"), 0.5f)
+where $jacc[0]
+return $o
+

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-multi-let.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-multi-let.aql
new file mode 100644
index 0000000..465cb51
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-multi-let.aql

@@ -0,0 +1,36 @@
+/*
+ * Description    : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on word tokens.
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index keyword_index on DBLP(title) type keyword;
+
+write output to nc1:"rttest/inverted-index-complex_word-jaccard-check-multi-let.adm";
+
+// This test is complex because we have three assigns to drill into.
+for $paper in dataset('DBLP')
+let $paper_tokens := word-tokens($paper.title)
+let $query_tokens := word-tokens("Transactions for Cooperative Environments")
+let $jacc := similarity-jaccard-check($paper_tokens, $query_tokens, 0.8f)
+where $jacc[0]
+return {"Paper": $paper_tokens, "Query": $query_tokens }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_01.aql
new file mode 100644
index 0000000..d802dc5
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_01.aql

@@ -0,0 +1,46 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance-check function of their authors.
+ *                  DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  id: int32, 
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index ngram_index on DBLP(authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-edit-distance-check_01.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where edit-distance-check($a.authors, $b.authors, 3)[0] and $a.id < $b.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_02.aql
new file mode 100644
index 0000000..01003f9
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_02.aql

@@ -0,0 +1,46 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance-check function of their authors.
+ *                  CSX has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  id: int32, 
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index ngram_index on CSX(authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-edit-distance-check_02.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where edit-distance-check($a.authors, $b.authors, 3)[0] and $a.id < $b.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_03.aql
new file mode 100644
index 0000000..890dfa9
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_03.aql

@@ -0,0 +1,31 @@
+/*
+ * Description    : Fuzzy self joins a dataset, DBLP, based on the edit-distance-check function of its authors.
+ *                  DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-edit-distance-check_03.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('DBLP')
+where edit-distance-check($a.authors, $b.authors, 3)[0] and $a.id < $b.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_01.aql
new file mode 100644
index 0000000..76622d9
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_01.aql

@@ -0,0 +1,46 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance function of their authors.
+ *                  DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  id: int32, 
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index ngram_index on DBLP(authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-edit-distance_01.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where edit-distance($a.authors, $b.authors) < 3 and $a.id < $b.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_02.aql
new file mode 100644
index 0000000..4298db1
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_02.aql

@@ -0,0 +1,46 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance function of their authors.
+ *                  CSX has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  id: int32, 
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index ngram_index on CSX(authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-edit-distance_02.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where edit-distance($a.authors, $b.authors) < 3 and $a.id < $b.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_03.aql
new file mode 100644
index 0000000..72ca9d0
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_03.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Fuzzy self joins a dataset, DBLP, based on the edit-distance function of its authors.
+ *                  DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-edit-distance_03.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('DBLP')
+where edit-distance($a.authors, $b.authors) < 3 and $a.id < $b.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql
new file mode 100644
index 0000000..14a4420
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql

@@ -0,0 +1,49 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on ~= using edit distance of their authors.
+ *                  DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  id: int32, 
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index ngram_index on CSX(authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-fuzzyeq-edit-distance_01.adm";
+
+set simfunction 'edit-distance';
+set simthreshold '3';
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where $a.authors ~= $b.authors and $a.id < $b.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_02.aql
new file mode 100644
index 0000000..2ad1f2b
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_02.aql

@@ -0,0 +1,49 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on ~= using edit distance of their authors.
+ *                  CSX has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  id: int32, 
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index ngram_index on DBLP(authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-fuzzyeq-edit-distance_01.adm";
+
+set simfunction 'edit-distance';
+set simthreshold '3';
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where $a.authors ~= $b.authors and $a.id < $b.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_03.aql
new file mode 100644
index 0000000..640b6e1
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_03.aql

@@ -0,0 +1,34 @@
+/*
+ * Description    : Fuzzy self joins a dataset, DBLP, based on ~= using edit distance of its authors.
+ *                  DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-fuzzyeq-edit-distance_03.adm";
+
+set simfunction 'edit-distance';
+set simthreshold '3';
+
+for $a in dataset('DBLP')
+for $b in dataset('DBLP')
+where $a.authors ~= $b.authors and $a.id < $b.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_01.aql
new file mode 100644
index 0000000..55af0e4
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_01.aql

@@ -0,0 +1,49 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on ~= using Jaccard of their titles' 3-gram tokens.
+ *                  DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  id: int32, 
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index ngram_index on DBLP(title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-fuzzyeq-jaccard_01.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.5f';
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where gram-tokens($a.title, 3, false) ~= gram-tokens($b.title, 3, false) and $a.id < $b.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_02.aql
new file mode 100644
index 0000000..c00726e
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_02.aql

@@ -0,0 +1,49 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on ~= using Jaccard their titles' 3-gram tokens.
+ *                  CSX has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  id: int32, 
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index ngram_index on CSX(title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-fuzzyeq-jaccard_02.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.5f';
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where gram-tokens($a.title, 3, false) ~= gram-tokens($b.title, 3, false) and $a.id < $b.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_03.aql
new file mode 100644
index 0000000..9d2ce15
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_03.aql

@@ -0,0 +1,35 @@
+/*
+ * Description    : Fuzzy self joins a dataset, DBLP, based on ~= using Jaccard of its titles' 3-gram tokens.
+ *                  DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-fuzzyeq-jaccard_03.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.5f';
+
+for $a in dataset('DBLP')
+for $b in dataset('DBLP')
+where gram-tokens($a.title, 3, false) ~= gram-tokens($b.title, 3, false) and $a.id < $b.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_01.aql
new file mode 100644
index 0000000..cb5f92c
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_01.aql

@@ -0,0 +1,47 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard-check function of their titles' 3-gram tokens.
+ *                  DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  id: int32, 
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index ngram_index on DBLP(title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-jaccard-check_01.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where similarity-jaccard-check(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false), 0.5f)[0]
+      and $a.id < $b.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_02.aql
new file mode 100644
index 0000000..8f487ed
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_02.aql

@@ -0,0 +1,47 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based the similarity-jaccard-check function of their titles' 3-gram tokens.
+ *                  CSX has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  id: int32, 
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index ngram_index on CSX(title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-jaccard-check_02.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where similarity-jaccard-check(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false), 0.5f)[0]
+      and $a.id < $b.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_03.aql
new file mode 100644
index 0000000..9c57be6
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_03.aql

@@ -0,0 +1,33 @@
+/*
+ * Description    : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard-check function of its titles' 3-gram tokens.
+ *                  DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-jaccard-check_03.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('DBLP')
+where similarity-jaccard-check(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false), 0.5f)[0]
+      and $a.id < $b.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_01.aql
new file mode 100644
index 0000000..14b52fb
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_01.aql

@@ -0,0 +1,47 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard function of their titles' 3-gram tokens.
+ *                  DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  id: int32, 
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index ngram_index on DBLP(title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-jaccard_01.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where similarity-jaccard(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false)) >= 0.5f
+      and $a.id < $b.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_02.aql
new file mode 100644
index 0000000..4c51967
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_02.aql

@@ -0,0 +1,47 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based the similarity-jaccard function of their titles' 3-gram tokens.
+ *                  CSX has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  id: int32, 
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index ngram_index on CSX(title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-jaccard_02.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where similarity-jaccard(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false)) >= 0.5f
+      and $a.id < $b.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_03.aql
new file mode 100644
index 0000000..cd84d30
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_03.aql

@@ -0,0 +1,33 @@
+/*
+ * Description    : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard function of its titles' 3-gram tokens.
+ *                  DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-jaccard_03.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('DBLP')
+where similarity-jaccard(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false)) >= 0.5f
+      and $a.id < $b.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_01.aql
new file mode 100644
index 0000000..b4f8b56
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_01.aql

@@ -0,0 +1,45 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on the edit-distance-check function of their interest lists.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-edit-distance-check_01.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where edit-distance-check($a.interests, $b.interests, 3)[0] and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_02.aql
new file mode 100644
index 0000000..db47ed6b
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_02.aql

@@ -0,0 +1,45 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on the edit-distance-check function of their interest lists.
+ *                  Customers2 has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers2(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-edit-distance-check_02.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where edit-distance-check($a.interests, $b.interests, 3)[0] and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_03.aql
new file mode 100644
index 0000000..d97c003
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_03.aql

@@ -0,0 +1,39 @@
+/*
+ * Description    : Fuzzy self joins a dataset, Customers, based on the edit-distance-check function of its interest lists.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-edit-distance-check_03.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers')
+where edit-distance-check($a.interests, $b.interests, 3)[0] and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_01.aql
new file mode 100644
index 0000000..4047e55
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_01.aql

@@ -0,0 +1,45 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on the edit-distance function of their interest lists.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-edit-distance_01.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where edit-distance($a.interests, $b.interests) <= 2 and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_02.aql
new file mode 100644
index 0000000..5745565
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_02.aql

@@ -0,0 +1,45 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on the edit-distance function of their interest lists.
+ *                  Customers2 has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers2(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-edit-distance_02.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where edit-distance($a.interests, $b.interests) <= 2 and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_03.aql
new file mode 100644
index 0000000..fc3fc4a
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_03.aql

@@ -0,0 +1,39 @@
+/*
+ * Description    : Fuzzy self joins a dataset, Customers, based on the edit-distance function of its interest lists.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-edit-distance_03.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers')
+where edit-distance($a.interests, $b.interests) <= 2 and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-edit-distance_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-edit-distance_01.aql
new file mode 100644
index 0000000..bf2b8bc
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-edit-distance_01.aql

@@ -0,0 +1,48 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on ~= using edit distance of their interest lists.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-fuzzyeq-jaccard_01.adm";
+
+set simfunction 'edit-distance';
+set simthreshold '3';
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where $a.interests ~= $b.interests and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-edit-distance_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-edit-distance_02.aql
new file mode 100644
index 0000000..5314b95
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-edit-distance_02.aql

@@ -0,0 +1,48 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on ~= using edit distance of their interest lists.
+ *                  Customers2 has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers2(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-fuzzyeq-jaccard_02.adm";
+
+set simfunction 'edit-distance';
+set simthreshold '3';
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where $a.interests ~= $b.interests and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-edit-distance_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-edit-distance_03.aql
new file mode 100644
index 0000000..15a9ab7
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-edit-distance_03.aql

@@ -0,0 +1,42 @@
+/*
+ * Description    : Fuzzy self joins a dataset, Customers, based on ~= using edit distance of its interest lists.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-fuzzyeq-jaccard_03.adm";
+
+set simfunction 'edit-distance';
+set simthreshold '3';
+
+for $a in dataset('Customers')
+for $b in dataset('Customers')
+where $a.interests ~= $b.interests and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_01.aql
new file mode 100644
index 0000000..f6f2f84
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_01.aql

@@ -0,0 +1,48 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on ~= using Jaccard of their interest lists.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-fuzzyeq-jaccard_01.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.7f';
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where $a.interests ~= $b.interests and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_02.aql
new file mode 100644
index 0000000..1951e6f
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_02.aql

@@ -0,0 +1,48 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on ~= using Jaccard of their interest lists.
+ *                  Customers2 has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers2(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-fuzzyeq-jaccard_02.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.7f';
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where $a.interests ~= $b.interests and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_03.aql
new file mode 100644
index 0000000..d791b85
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_03.aql

@@ -0,0 +1,42 @@
+/*
+ * Description    : Fuzzy self joins a dataset, Customers, based on ~= using Jaccard of its interest lists.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-fuzzyeq-jaccard_03.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.7f';
+
+for $a in dataset('Customers')
+for $b in dataset('Customers')
+where $a.interests ~= $b.interests and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_01.aql
new file mode 100644
index 0000000..5f6f59b
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_01.aql

@@ -0,0 +1,45 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on the similarity-jaccard-check function of their interest lists.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-jaccard-check_01.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where similarity-jaccard-check($a.interests, $b.interests, 0.7f)[0] and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_02.aql
new file mode 100644
index 0000000..0754282
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_02.aql

@@ -0,0 +1,45 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on the similarity-jaccard-check function of their interest lists.
+ *                  Customers2 has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers2(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-jaccard-check_02.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where similarity-jaccard-check($a.interests, $b.interests, 0.7f)[0] and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_03.aql
new file mode 100644
index 0000000..4dbc4d5
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_03.aql

@@ -0,0 +1,39 @@
+/*
+ * Description    : Fuzzy self joins a dataset, Customers, based on the similarity-jaccard-check function of its interest lists.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-jaccard-check_03.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers')
+where similarity-jaccard-check($a.interests, $b.interests, 0.7f)[0] and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_01.aql
new file mode 100644
index 0000000..ddf386e
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_01.aql

@@ -0,0 +1,45 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on the similarity-jaccard function of their interest lists.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-jaccard_01.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_02.aql
new file mode 100644
index 0000000..50c3db6
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_02.aql

@@ -0,0 +1,45 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on the similarity-jaccard function of their interest lists.
+ *                  Customers2 has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers2(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-jaccard_02.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_03.aql
new file mode 100644
index 0000000..50729ba
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_03.aql

@@ -0,0 +1,39 @@
+/*
+ * Description    : Fuzzy self joins a dataset, Customers, based on the similarity-jaccard function of its interest lists.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-jaccard_03.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers')
+where similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_01.aql
new file mode 100644
index 0000000..1fa479d
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_01.aql

@@ -0,0 +1,48 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on ~= using Jaccard of their interest sets.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: {{string}},
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_ulist-fuzzyeq-jaccard_01.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.7f';
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where $a.interests ~= $b.interests and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_02.aql
new file mode 100644
index 0000000..e5b532f
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_02.aql

@@ -0,0 +1,48 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on ~= using Jaccard of their interest sets.
+ *                  Customers2 has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: {{string}},
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers2(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_ulist-fuzzyeq-jaccard_02.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.7f';
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where $a.interests ~= $b.interests and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_03.aql
new file mode 100644
index 0000000..a881c89
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_03.aql

@@ -0,0 +1,42 @@
+/*
+ * Description    : Fuzzy self joins a dataset, Customers, based on ~= using Jaccard of its interest sets.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: {{string}},
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_ulist-fuzzyeq-jaccard_03.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.7f';
+
+for $a in dataset('Customers')
+for $b in dataset('Customers')
+where $a.interests ~= $b.interests and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_01.aql
new file mode 100644
index 0000000..5d95894
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_01.aql

@@ -0,0 +1,45 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on the similarity-jaccard-check function of their interest sets.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: {{string}},
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_ulist-jaccard-check_01.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where similarity-jaccard-check($a.interests, $b.interests, 0.7f)[0] and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_02.aql
new file mode 100644
index 0000000..561e15f
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_02.aql

@@ -0,0 +1,45 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on the similarity-jaccard-check function of their interest sets.
+ *                  Customers2 has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: {{string}},
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers2(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_ulist-jaccard-check_02.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where similarity-jaccard-check($a.interests, $b.interests, 0.7f)[0] and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_03.aql
new file mode 100644
index 0000000..87d78e5
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_03.aql

@@ -0,0 +1,39 @@
+/*
+ * Description    : Fuzzy self joins a dataset, Customers, based on the similarity-jaccard-check function of its interest sets.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: {{string}},
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_ulist-jaccard-check_03.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers')
+where similarity-jaccard-check($a.interests, $b.interests, 0.7f)[0] and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_01.aql
new file mode 100644
index 0000000..864ede7
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_01.aql

@@ -0,0 +1,45 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on the similarity-jaccard function of their interest sets.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: {{string}},
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_ulist-jaccard_01.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_02.aql
new file mode 100644
index 0000000..4d9f89e
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_02.aql

@@ -0,0 +1,45 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on the similarity-jaccard function of their interest sets.
+ *                  Customers2 has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: {{string}},
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers2(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_ulist-jaccard_02.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_03.aql
new file mode 100644
index 0000000..5eae45b
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_03.aql

@@ -0,0 +1,39 @@
+/*
+ * Description    : Fuzzy self joins a dataset, Customers, based on the similarity-jaccard function of its interest sets.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32, 
+  street: string,
+  city: string
+}
+
+create type CustomerType as closed {
+  cid: int32, 
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: {{string}},
+  children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+  
+load dataset Customers 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_ulist-jaccard_03.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers')
+where similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_01.aql
new file mode 100644
index 0000000..6ea53e9
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_01.aql

@@ -0,0 +1,49 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on ~= using Jaccard of their titles' word tokens.
+ *                  DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  id: int32, 
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index keyword_index on DBLP(title) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_word-fuzzyeq-jaccard_01.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.5f';
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where word-tokens($a.title) ~= word-tokens($b.title) and $a.id < $b.id
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_02.aql
new file mode 100644
index 0000000..254a825
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_02.aql

@@ -0,0 +1,49 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on ~= using Jaccard of their titles' word tokens.
+ *                  CSX has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  id: int32, 
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index keyword_index on CSX(title) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_word-fuzzyeq-jaccard_02.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.5f';
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where word-tokens($a.title) ~= word-tokens($b.title) and $a.id < $b.id
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_03.aql
new file mode 100644
index 0000000..f553abe
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_03.aql

@@ -0,0 +1,35 @@
+/*
+ * Description    : Fuzzy self joins a dataset, DBLP, based on ~= using Jaccard of its titles' word tokens.
+ *                  DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index keyword_index on DBLP(title) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_word-fuzzyeq-jaccard_03.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.5f';
+
+for $a in dataset('DBLP')
+for $b in dataset('DBLP')
+where word-tokens($a.title) ~= word-tokens($b.title) and $a.id < $b.id
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_01.aql
new file mode 100644
index 0000000..63efb89
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_01.aql

@@ -0,0 +1,47 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard-check function of their titles' word tokens.
+ *                  DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  id: int32, 
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index keyword_index on DBLP(title) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_word-jaccard-check_01.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where similarity-jaccard-check(word-tokens($a.title), word-tokens($b.title), 0.5f)[0]
+      and $a.id < $b.id
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_02.aql
new file mode 100644
index 0000000..1298833
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_02.aql

@@ -0,0 +1,47 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard-check function of their titles' word tokens.
+ *                  CSX has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  id: int32, 
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index keyword_index on CSX(title) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_word-jaccard-check_02.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where similarity-jaccard-check(word-tokens($a.title), word-tokens($b.title), 0.5f)[0]
+      and $a.id < $b.id
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_03.aql
new file mode 100644
index 0000000..c2d6b78
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_03.aql

@@ -0,0 +1,33 @@
+/*
+ * Description    : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard-check function of its titles' word tokens.
+ *                  DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index keyword_index on DBLP(title) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_word-jaccard-check_03.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('DBLP')
+where similarity-jaccard-check(word-tokens($a.title), word-tokens($b.title), 0.5f)[0]
+      and $a.id < $b.id
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_01.aql
new file mode 100644
index 0000000..7c69928
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_01.aql

@@ -0,0 +1,48 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard function of their titles' word tokens.
+ *                  DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  id: int32, 
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index keyword_index on DBLP(title) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_word-jaccard_01.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where similarity-jaccard(word-tokens($a.title), word-tokens($b.title)) >= 0.5f
+      and $a.id < $b.id
+return {"arec": $a, "brec": $b }
+

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_02.aql
new file mode 100644
index 0000000..1f5c082
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_02.aql

@@ -0,0 +1,48 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard function of their titles' word tokens.
+ *                  CSX has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  id: int32, 
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index keyword_index on CSX(title) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_word-jaccard_02.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where similarity-jaccard(word-tokens($a.title), word-tokens($b.title)) >= 0.5f
+      and $a.id < $b.id
+return {"arec": $a, "brec": $b }
+

diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_03.aql
new file mode 100644
index 0000000..94e59c0
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_03.aql

@@ -0,0 +1,33 @@
+/*
+ * Description    : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard function of its titles' word tokens.
+ *                  DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index keyword_index on DBLP(title) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_word-jaccard_03.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('DBLP')
+where similarity-jaccard(word-tokens($a.title), word-tokens($b.title)) >= 0.5f
+      and $a.id < $b.id
+return {"arec": $a, "brec": $b }

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_01.aql
new file mode 100644
index 0000000..a456568
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_01.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the edit-distance function with a threshold 
+ *                  into edit-distance-check if possible.
+ *                  Tests that the optimizer drills through the let clause.
+ *                  Replacement should happen because of a "<=" condition on the edit distance.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_edit-distance-let-to-edit-distance-check_01.adm";
+
+for $o in dataset('DBLP')
+let $ed := edit-distance($o.authors, "Michael J. Carey")
+where $ed <= 2
+return $ed
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_02.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_02.aql
new file mode 100644
index 0000000..be946af
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_02.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the edit-distance function with a threshold 
+ *                  into edit-distance-check if possible.
+ *                  Tests that the optimizer drills through the let clause. 
+ *                  Replacement should happen because of a reverse "<=" condition on the edit distance.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_edit-distance-let-to-edit-distance-check_02.adm";
+
+for $o in dataset('DBLP')
+let $ed := edit-distance($o.authors, "Michael J. Carey")
+where 2 >= $ed
+return $ed
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_03.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_03.aql
new file mode 100644
index 0000000..e709e8c
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_03.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the edit-distance function with a threshold 
+ *                  into edit-distance-check if possible.
+ *                  Tests that the optimizer drills through the let clause.
+ *                  Replacement should happen because of a "<" condition on the edit distance.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_edit-distance-let-to-edit-distance-check_03.adm";
+
+for $o in dataset('DBLP')
+let $ed := edit-distance($o.authors, "Michael J. Carey")
+where $ed < 3
+return $ed
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_04.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_04.aql
new file mode 100644
index 0000000..8bbd3d6
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_04.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the edit-distance function with a threshold 
+ *                  into edit-distance-check if possible.
+ *                  Tests that the optimizer drills through the let clause.
+ *                  Replacement should happen because of a reverse "<" condition on the edit distance.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_edit-distance-let-to-edit-distance-check_04.adm";
+
+for $o in dataset('DBLP')
+let $ed := edit-distance($o.authors, "Michael J. Carey")
+where 3 > $ed
+return $ed
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_05.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_05.aql
new file mode 100644
index 0000000..b929ef2
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_05.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the edit-distance function with a threshold 
+ *                  into edit-distance-check if possible.
+ *                  Tests that the optimizer drills through the let clause.
+ *                  Replacement should *not* happen because of a ">=" condition on the edit distance.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_edit-distance-let-to-edit-distance-check_05.adm";
+
+for $o in dataset('DBLP')
+let $ed := edit-distance($o.authors, "Michael J. Carey")
+where $ed >= 2
+return $ed
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_06.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_06.aql
new file mode 100644
index 0000000..3b86bd0
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_06.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the edit-distance function with a threshold 
+ *                  into edit-distance-check if possible.
+ *                  Tests that the optimizer drills through the let clause.
+ *                   Replacement should *not* happen because of a reverse ">=" condition on the edit distance.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_edit-distance-let-to-edit-distance-check_06.adm";
+
+for $o in dataset('DBLP')
+let $ed := edit-distance($o.authors, "Michael J. Carey")
+where 2 <= $ed
+return $ed
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_07.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_07.aql
new file mode 100644
index 0000000..9960262
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_07.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the edit-distance function with a threshold 
+ *                  into edit-distance-check if possible.
+ *                  Tests that the optimizer drills through the let clause.
+ *                  Replacement should *not* happen because of a ">" condition on the edit distance.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_edit-distance-let-to-edit-distance-check_07.adm";
+
+for $o in dataset('DBLP')
+let $ed := edit-distance($o.authors, "Michael J. Carey")
+where $ed > 2
+return $ed
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_08.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_08.aql
new file mode 100644
index 0000000..ddb5018
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-let-to-edit-distance-check_08.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the edit-distance function with a threshold 
+ *                  into edit-distance-check if possible.
+ *                  Tests that the optimizer drills through the let clause.
+ *                  Replacement should *not* happen because of a reverse ">" condition on the edit distance.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_edit-distance-let-to-edit-distance-check_08.adm";
+
+for $o in dataset('DBLP')
+let $ed := edit-distance($o.authors, "Michael J. Carey")
+where 2 < $ed
+return $ed
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_01.aql
new file mode 100644
index 0000000..4a21a3b
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_01.aql

@@ -0,0 +1,30 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the edit-distance function with a threshold 
+ *                  into edit-distance-check if possible.
+ *                  Replacement should happen because of a "<=" condition on the edit distance.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_edit-distance-to-edit-distance-check_01.adm";
+
+for $o in dataset('DBLP')
+where edit-distance($o.authors, "Michael J. Carey") <= 2
+return $o

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_02.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_02.aql
new file mode 100644
index 0000000..6657bf4
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_02.aql

@@ -0,0 +1,30 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the edit-distance function with a threshold 
+ *                  into edit-distance-check if possible.
+ *                  Replacement should happen because of a reverse "<=" condition on the edit distance.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_edit-distance-to-edit-distance-check_02.adm";
+
+for $o in dataset('DBLP')
+where 2 >= edit-distance($o.authors, "Michael J. Carey")
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_03.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_03.aql
new file mode 100644
index 0000000..f1796a1
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_03.aql

@@ -0,0 +1,30 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the edit-distance function with a threshold 
+ *                  into edit-distance-check if possible.
+ *                  Replacement should happen because of a "<" condition on the edit distance.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_edit-distance-to-edit-distance-check_03.adm";
+
+for $o in dataset('DBLP')
+where edit-distance($o.authors, "Michael J. Carey") < 3
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_04.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_04.aql
new file mode 100644
index 0000000..fb63621
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_04.aql

@@ -0,0 +1,30 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the edit-distance function with a threshold 
+ *                  into edit-distance-check if possible.
+ *                  Replacement should happen because of a reverse "<" condition on the edit distance.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_edit-distance-to-edit-distance-check_04.adm";
+
+for $o in dataset('DBLP')
+where 3 > edit-distance($o.authors, "Michael J. Carey")
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_05.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_05.aql
new file mode 100644
index 0000000..b421d54
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_05.aql

@@ -0,0 +1,31 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the edit-distance function with a threshold 
+ *                  into edit-distance-check if possible.
+ *                  Replacement should *not* happen because of a ">=" condition on the edit distance.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_edit-distance-to-edit-distance-check_05.adm";
+
+// We cannot introduce edit-distance-check because the condition is >=
+for $o in dataset('DBLP')
+where edit-distance($o.authors, "Michael J. Carey") >= 2
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_06.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_06.aql
new file mode 100644
index 0000000..a9372ec
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_06.aql

@@ -0,0 +1,31 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the edit-distance function with a threshold 
+ *                  into edit-distance-check if possible.
+ *                  Replacement should *not* happen because of a reverse ">=" condition on the edit distance.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_edit-distance-to-edit-distance-check_06.adm";
+
+// We cannot introduce edit-distance-check because the condition is <=
+for $o in dataset('DBLP')
+where 2 <= edit-distance($o.authors, "Michael J. Carey")
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_07.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_07.aql
new file mode 100644
index 0000000..74959c7
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_07.aql

@@ -0,0 +1,31 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the edit-distance function with a threshold 
+ *                  into edit-distance-check if possible.
+ *                  Replacement should *not* happen because of a ">" condition on the edit distance.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_edit-distance-to-edit-distance-check_07.adm";
+
+// We cannot introduce edit-distance-check because the condition is >
+for $o in dataset('DBLP')
+where edit-distance($o.authors, "Michael J. Carey") > 2
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_08.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_08.aql
new file mode 100644
index 0000000..4ac54ed
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/edit-distance-to-edit-distance-check_08.aql

@@ -0,0 +1,31 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the edit-distance function with a threshold 
+ *                  into edit-distance-check if possible.
+ *                  Replacement should *not* happen because of a reverse ">" condition on the edit distance.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_edit-distance-to-edit-distance-check_08.adm";
+
+// We cannot introduce edit-distance-check because the condition is <
+for $o in dataset('DBLP')
+where 2 < edit-distance($o.authors, "Michael J. Carey")
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/fuzzyeq-to-edit-distance-check.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/fuzzyeq-to-edit-distance-check.aql
new file mode 100644
index 0000000..f53435b
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/fuzzyeq-to-edit-distance-check.aql

@@ -0,0 +1,33 @@
+/*
+ * Description    : Tests that the FuzzyEqRule rewrites ~= using edit distance 
+ *                  into edit-distance-check.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_fuzzyeq-to-edit-distance-check.adm";
+
+set simfunction 'edit-distance';
+set simthreshold '1';
+
+// Tests two rules: FuzzyEqRule and SimilarityCheckRule
+for $o in dataset('DBLP')
+where $o.authors ~= "Michael J. Carey"
+return $o
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/fuzzyeq-to-jaccard-check.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/fuzzyeq-to-jaccard-check.aql
new file mode 100644
index 0000000..f4da76f
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/fuzzyeq-to-jaccard-check.aql

@@ -0,0 +1,33 @@
+/*
+ * Description    : Tests that the FuzzyEqRule rewrites ~= using Jaccard 
+ *                  into edit-distance-check.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_fuzzyeq-to-jaccard-check.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.8f';
+
+// Tests two rules: FuzzyEqRule and SimilarityCheckRule
+for $paper in dataset('DBLP')
+where word-tokens($paper.title) ~= word-tokens("Transactions for Cooperative Environments")
+return $paper
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_01.aql
new file mode 100644
index 0000000..c76f15b
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_01.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the similarity-jaccard function with a threshold 
+ *                  into similarity-jaccard-check-check if possible.
+ *                  Tests that the optimizer drills through the let clause.
+ *                  Replacement should happen because of a ">=" condition on the similarity.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_jaccard-let-to-jaccard-check_01.adm";
+
+for $paper in dataset('DBLP')
+let $jacc := similarity-jaccard(word-tokens($paper.title), word-tokens("Transactions for Cooperative Environments"))
+where $jacc >= 0.8f
+return $jacc

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_02.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_02.aql
new file mode 100644
index 0000000..52296d5
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_02.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the similarity-jaccard function with a threshold 
+ *                  into similarity-jaccard-check-check if possible.
+ *                  Tests that the optimizer drills through the let clause.
+ *                  Replacement should happen because of a reverse ">=" condition on the similarity.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_jaccard-let-to-jaccard-check_01.adm";
+
+for $paper in dataset('DBLP')
+let $jacc := similarity-jaccard(word-tokens($paper.title), word-tokens("Transactions for Cooperative Environments"))
+where 0.8f <= $jacc 
+return $jacc

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_03.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_03.aql
new file mode 100644
index 0000000..8251c76
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_03.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the similarity-jaccard function with a threshold 
+ *                  into similarity-jaccard-check-check if possible.
+ *                  Tests that the optimizer drills through the let clause.
+ *                  Replacement should happen because of a ">" condition on the similarity.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_jaccard-let-to-jaccard-check_01.adm";
+
+for $paper in dataset('DBLP')
+let $jacc := similarity-jaccard(word-tokens($paper.title), word-tokens("Transactions for Cooperative Environments"))
+where $jacc > 0.8f
+return $jacc

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_04.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_04.aql
new file mode 100644
index 0000000..2c53231
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_04.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the similarity-jaccard function with a threshold 
+ *                  into similarity-jaccard-check-check if possible.
+ *                  Tests that the optimizer drills through the let clause.
+ *                  Replacement should happen because of a reverse ">" condition on the similarity.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_jaccard-let-to-jaccard-check_01.adm";
+
+for $paper in dataset('DBLP')
+let $jacc := similarity-jaccard(word-tokens($paper.title), word-tokens("Transactions for Cooperative Environments"))
+where 0.8f < $jacc 
+return $jacc

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_05.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_05.aql
new file mode 100644
index 0000000..03a0321
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_05.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the similarity-jaccard function with a threshold 
+ *                  into similarity-jaccard-check-check if possible.
+ *                  Tests that the optimizer drills through the let clause.
+ *                  Replacement should *not* happen because of a "<=" condition on the similarity.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_jaccard-let-to-jaccard-check_01.adm";
+
+for $paper in dataset('DBLP')
+let $jacc := similarity-jaccard(word-tokens($paper.title), word-tokens("Transactions for Cooperative Environments"))
+where $jacc <= 0.8f
+return $jacc

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_06.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_06.aql
new file mode 100644
index 0000000..b1d26da
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_06.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the similarity-jaccard function with a threshold 
+ *                  into similarity-jaccard-check-check if possible.
+ *                  Tests that the optimizer drills through the let clause.
+ *                  Replacement should *not* happen because of a reverse "<=" condition on the similarity.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_jaccard-let-to-jaccard-check_01.adm";
+
+for $paper in dataset('DBLP')
+let $jacc := similarity-jaccard(word-tokens($paper.title), word-tokens("Transactions for Cooperative Environments"))
+where 0.8f >= $jacc 
+return $jacc

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_07.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_07.aql
new file mode 100644
index 0000000..e008feb
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_07.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the similarity-jaccard function with a threshold 
+ *                  into similarity-jaccard-check-check if possible.
+ *                  Tests that the optimizer drills through the let clause.
+ *                  Replacement should *not* happen because of a "<" condition on the similarity.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_jaccard-let-to-jaccard-check_01.adm";
+
+for $paper in dataset('DBLP')
+let $jacc := similarity-jaccard(word-tokens($paper.title), word-tokens("Transactions for Cooperative Environments"))
+where $jacc < 0.8f
+return $jacc

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_08.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_08.aql
new file mode 100644
index 0000000..7e93d7f
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-let-to-jaccard-check_08.aql

@@ -0,0 +1,32 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the similarity-jaccard function with a threshold 
+ *                  into similarity-jaccard-check-check if possible.
+ *                  Tests that the optimizer drills through the let clause.
+ *                  Replacement should *not* happen because of a reverse "<" condition on the similarity.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_jaccard-let-to-jaccard-check_01.adm";
+
+for $paper in dataset('DBLP')
+let $jacc := similarity-jaccard(word-tokens($paper.title), word-tokens("Transactions for Cooperative Environments"))
+where 0.8f > $jacc 
+return $jacc

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_01.aql
new file mode 100644
index 0000000..ecc0554
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_01.aql

@@ -0,0 +1,30 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the similarity-jaccard function with a threshold 
+ *                  into similarity-jaccard-check-check if possible.
+ *                  Replacement should happen because of a ">=" condition on the similarity.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_jaccard-to-jaccard-check_01.adm";
+
+for $paper in dataset('DBLP')
+where similarity-jaccard(word-tokens($paper.title), word-tokens("Transactions for Cooperative Environments")) >= 0.8f
+return $paper
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_02.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_02.aql
new file mode 100644
index 0000000..18bf1a3
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_02.aql

@@ -0,0 +1,30 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the similarity-jaccard function with a threshold 
+ *                  into similarity-jaccard-check-check if possible.
+ *                  Replacement should happen because of a reverse ">=" condition on the similarity.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_jaccard-to-jaccard-check_02.adm";
+
+for $paper in dataset('DBLP')
+where 0.8f <= similarity-jaccard(word-tokens($paper.title), word-tokens("Transactions for Cooperative Environments"))
+return $paper
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_03.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_03.aql
new file mode 100644
index 0000000..f2f6e77
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_03.aql

@@ -0,0 +1,30 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the similarity-jaccard function with a threshold 
+ *                  into similarity-jaccard-check-check if possible.
+ *                  Replacement should happen because of a ">" condition on the similarity.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_jaccard-to-jaccard-check_02.adm";
+
+for $paper in dataset('DBLP')
+where similarity-jaccard(word-tokens($paper.title), word-tokens("Transactions for Cooperative Environments")) > 0.8f
+return $paper
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_04.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_04.aql
new file mode 100644
index 0000000..8a945bc
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_04.aql

@@ -0,0 +1,30 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the similarity-jaccard function with a threshold 
+ *                  into similarity-jaccard-check-check if possible.
+ *                  Replacement should happen because of a reverse ">" condition on the similarity.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_jaccard-to-jaccard-check_03.adm";
+
+for $paper in dataset('DBLP')
+where 0.8f < similarity-jaccard(word-tokens($paper.title), word-tokens("Transactions for Cooperative Environments"))
+return $paper
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_05.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_05.aql
new file mode 100644
index 0000000..e9808ff
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_05.aql

@@ -0,0 +1,31 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the similarity-jaccard function with a threshold 
+ *                  into similarity-jaccard-check-check if possible.
+ *                  Replacement should *not* happen because of a "<=" condition on the similarity.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_jaccard-to-jaccard-check_05.adm";
+
+// We cannot introduce jaccard-check because the condition is <=
+for $paper in dataset('DBLP')
+where similarity-jaccard(word-tokens($paper.title), word-tokens("Transactions for Cooperative Environments")) <= 0.8f
+return $paper
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_06.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_06.aql
new file mode 100644
index 0000000..91953fc
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_06.aql

@@ -0,0 +1,31 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the similarity-jaccard function with a threshold 
+ *                  into similarity-jaccard-check-check if possible.
+ *                  Replacement should *not* happen because of a reverse "<=" condition on the similarity.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_jaccard-to-jaccard-check_06.adm";
+
+// We cannot introduce jaccard-check because the condition is >=
+for $paper in dataset('DBLP')
+where 0.8f >= similarity-jaccard(word-tokens($paper.title), word-tokens("Transactions for Cooperative Environments"))
+return $paper
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_07.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_07.aql
new file mode 100644
index 0000000..2f3b080
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_07.aql

@@ -0,0 +1,31 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the similarity-jaccard function with a threshold 
+ *                  into similarity-jaccard-check-check if possible.
+ *                  Replacement should *not* happen because of a "<" condition on the similarity.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_jaccard-to-jaccard-check_07.adm";
+
+// We cannot introduce jaccard-check because the condition is <
+for $paper in dataset('DBLP')
+where similarity-jaccard(word-tokens($paper.title), word-tokens("Transactions for Cooperative Environments")) < 0.8f
+return $paper
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_08.aql b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_08.aql
new file mode 100644
index 0000000..fc61d1f
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/similarity/jaccard-to-jaccard-check_08.aql

@@ -0,0 +1,31 @@
+/*
+ * Description    : Tests that the SimilarityCheckRule rewrites the similarity-jaccard function with a threshold 
+ *                  into similarity-jaccard-check-check if possible.
+ *                  Replacement should *not* happen because of a reverse "<" condition on the similarity.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32, 
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+load dataset DBLP 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_jaccard-to-jaccard-check_08.adm";
+
+// We cannot introduce jaccard-check because the condition is >
+for $paper in dataset('DBLP')
+where 0.8f > similarity-jaccard(word-tokens($paper.title), word-tokens("Transactions for Cooperative Environments"))
+return $paper
\ No newline at end of file
commit	c576c60758cc4af9a2d5dc09651f31d338b0aecf	[log] [tgz]
author	alexander.behm <alexander.behm@eaa15691-b419-025a-1212-ee371bd00084>	Fri Jul 06 02:41:15 2012 +0000
committer	alexander.behm <alexander.behm@eaa15691-b419-025a-1212-ee371bd00084>	Fri Jul 06 02:41:15 2012 +0000
tree	9107af14fcc729c75e99a19d2ebbfbe00c95f506
parent	411f1e4d2a0b3a8ad96ad6cc4c3163d4a77142c0 [diff]