Reintegrated asterix-fuzzy.
git-svn-id: https://asterixdb.googlecode.com/svn/branches/asterix_stabilization@437 eaa15691-b419-025a-1212-ee371bd00084
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-ngram-edit-distance.aql b/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-ngram-edit-distance.aql
new file mode 100644
index 0000000..dfd86e3
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-ngram-edit-distance.aql
@@ -0,0 +1,46 @@
+/*
+ * Description : Fuzzy joins two datasets, Customers and Customers2, based on the edit-distance function of their names.
+ * Customers has a 3-gram index on name, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as open {
+ number: int32,
+ street: string,
+ city: string
+}
+
+create type CustomerType as open {
+ cid: int32,
+ name: string,
+ age: int32?,
+ address: AddressType?,
+ interests: [string],
+ children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index ngram_index on Customers(name) type ngram(3);
+
+write output to nc1:"rttest/index-join_inverted-index-ngram-edit-distance.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where edit-distance($a.name, $b.name) <= 4 and $a.cid < $b.cid
+order by $a.cid, $b.cid
+return { "arec": $a, "brec": $b }
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-ngram-jaccard.aql b/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-ngram-jaccard.aql
new file mode 100644
index 0000000..6f69866
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-ngram-jaccard.aql
@@ -0,0 +1,48 @@
+/*
+ * Description : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard function of their titles' 3-gram tokens.
+ * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create type CSXType as closed {
+ id: int32,
+ csxid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index ngram_index on DBLP(title) type ngram(3);
+
+write output to nc1:"rttest/index-join_inverted-index-ngram-jaccard.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where similarity-jaccard(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false)) >= 0.5f
+ and $a.id < $b.id
+order by $a.id, $b.id
+return { "arec": $a.title, "brec": $b.title }
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-olist-edit-distance.aql b/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-olist-edit-distance.aql
new file mode 100644
index 0000000..601d1b8
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-olist-edit-distance.aql
@@ -0,0 +1,46 @@
+/*
+ * Description : Fuzzy joins two datasets, Customers and Customers2, based on the edit-distance function of their interest lists.
+ * Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as open {
+ number: int32,
+ street: string,
+ city: string
+}
+
+create type CustomerType as open {
+ cid: int32,
+ name: string,
+ age: int32?,
+ address: AddressType?,
+ interests: [string],
+ children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/index-join_inverted-index-olist-edit-distance.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where len($a.interests) > 2 and len($b.interests) > 2 and edit-distance($a.interests, $b.interests) <= 1 and $a.cid < $b.cid
+order by $a.cid, $b.cid
+return { "arec": $a, "brec": $b }
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-olist-jaccard.aql b/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-olist-jaccard.aql
new file mode 100644
index 0000000..91fcd80
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-olist-jaccard.aql
@@ -0,0 +1,47 @@
+/*
+ * Description : Fuzzy joins two datasets, Customers and Customers2, based on the Jaccard similarity of their interest lists.
+ * Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+ number: int32,
+ street: string,
+ city: string
+}
+
+create type CustomerType as closed {
+ cid: int32,
+ name: string,
+ age: int32?,
+ address: AddressType?,
+ interests: [string],
+ children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/index-join_inverted-index-olist-jaccard.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where similarity-jaccard($a.interests, $b.interests) >= 0.9f
+ and $a.cid < $b.cid
+order by $a.cid, $b.cid
+return { "a": $a.interests, "b": $b.interests }
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-ulist-jaccard.aql b/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-ulist-jaccard.aql
new file mode 100644
index 0000000..2b2d52c
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-ulist-jaccard.aql
@@ -0,0 +1,47 @@
+/*
+ * Description : Fuzzy joins two datasets, Customers and Customers2, based on the Jaccard similarity of their interest sets.
+ * Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+ number: int32,
+ street: string,
+ city: string
+}
+
+create type CustomerType as closed {
+ cid: int32,
+ name: string,
+ age: int32?,
+ address: AddressType?,
+ interests: {{string}},
+ children: [ { name: string, age: int32? } ]
+}
+
+create dataset Customers(CustomerType) partitioned by key cid;
+
+create dataset Customers2(CustomerType) partitioned by key cid;
+
+load dataset Customers
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+load dataset Customers2
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/index-join_inverted-index-ulist-jaccard.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where similarity-jaccard($a.interests, $b.interests) >= 0.9f
+ and $a.cid < $b.cid
+order by $a.cid, $b.cid
+return { "a": $a.interests, "b": $b.interests }
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-word-jaccard.aql b/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-word-jaccard.aql
new file mode 100644
index 0000000..228dfd2
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-join/inverted-index-word-jaccard.aql
@@ -0,0 +1,48 @@
+/*
+ * Description : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard function of their titles' word tokens.
+ * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create type CSXType as closed {
+ id: int32,
+ csxid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create dataset DBLP(DBLPType) partitioned by key id;
+
+create dataset CSX(CSXType) partitioned by key id;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+load dataset CSX
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+create index keyword_index on DBLP(title) type keyword;
+
+write output to nc1:"rttest/index-join_inverted-index-word-jaccard.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where similarity-jaccard(word-tokens($a.title), word-tokens($b.title)) >= 0.5f
+ and $a.id < $b.id
+order by $a.id, $b.id
+return { "arec": $a.title, "brec": $b.title }
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/index/cust-index-age-nullable.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/cust-index-age-nullable.aql
similarity index 90%
rename from asterix-app/src/test/resources/runtimets/queries/index/cust-index-age-nullable.aql
rename to asterix-app/src/test/resources/runtimets/queries/index-selection/cust-index-age-nullable.aql
index 19348a6..460a212 100644
--- a/asterix-app/src/test/resources/runtimets/queries/index/cust-index-age-nullable.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/cust-index-age-nullable.aql
@@ -25,7 +25,7 @@
create index age_index on Customers(age);
-write output to nc1:"rttest/index_cust-index-age-nullable.adm";
+write output to nc1:"rttest/index-selection_cust-index-age-nullable.adm";
for $c in dataset('Customers')
where $c.age < 20
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-contains.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-contains.aql
new file mode 100644
index 0000000..ad85be7
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-contains.aql
@@ -0,0 +1,29 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+ partitioned by key id on group1;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(title) type ngram(3);
+
+write output to nc1:"rttest/index-selection_inverted-index-ngram-contains.adm";
+
+for $o in dataset('DBLP')
+where contains($o.title, "Multimedia")
+order by $o.id
+return $o
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-panic.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-panic.aql
new file mode 100644
index 0000000..b275423
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-panic.aql
@@ -0,0 +1,29 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+ partitioned by key id on group1;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(authors) type ngram(3);
+
+write output to nc1:"rttest/index-selection_inverted-index-ngram-edit-distance-panic.adm";
+
+for $o in dataset('DBLP')
+let $ed := edit-distance-check($o.authors, "Amihay Motro", 5)
+where $ed[0]
+return $o
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance.aql
new file mode 100644
index 0000000..ddcdd4b
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance.aql
@@ -0,0 +1,29 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+ partitioned by key id on group1;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(authors) type ngram(3);
+
+write output to nc1:"rttest/index-selection_inverted-index-ngram-edit-distance.adm";
+
+for $o in dataset('DBLP')
+let $ed := edit-distance-check($o.authors, "Amihay Motro", 1)
+where $ed[0]
+return $o
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-jaccard.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-jaccard.aql
new file mode 100644
index 0000000..501ebce
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-jaccard.aql
@@ -0,0 +1,29 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+ partitioned by key id on group1;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index ngram_index on DBLP(title) type ngram(3);
+
+write output to nc1:"rttest/index-selection_inverted-index-ngram-jaccard.adm";
+
+for $o in dataset('DBLP')
+let $jacc := similarity-jaccard-check(gram-tokens($o.title, 3, false), gram-tokens("Transactions for Cooperative Environments", 3, false), 0.5f)
+where $jacc[0]
+return $o
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-olist-edit-distance-panic.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-olist-edit-distance-panic.aql
new file mode 100644
index 0000000..deb51a7
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-olist-edit-distance-panic.aql
@@ -0,0 +1,37 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+ number: int32,
+ street: string,
+ city: string
+}
+
+create type CustomerType as closed {
+ cid: int32,
+ name: string,
+ age: int32?,
+ address: AddressType?,
+ interests: [string],
+ children: [ { name: string, age: int32? } ]
+}
+
+create nodegroup group1 if not exists on nc1;
+
+create dataset Customers(CustomerType)
+ partitioned by key cid on group1;
+
+load dataset Customers
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/index-selection_inverted-index-olist-edit-distance-panic.adm";
+
+for $c in dataset('Customers')
+let $ed := edit-distance-check($c.interests, ["computers", "wine", "walking"], 3)
+where $ed[0]
+order by $c.cid
+return $c
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-olist-edit-distance.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-olist-edit-distance.aql
new file mode 100644
index 0000000..bb05fc1
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-olist-edit-distance.aql
@@ -0,0 +1,37 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+ number: int32,
+ street: string,
+ city: string
+}
+
+create type CustomerType as closed {
+ cid: int32,
+ name: string,
+ age: int32?,
+ address: AddressType?,
+ interests: [string],
+ children: [ { name: string, age: int32? } ]
+}
+
+create nodegroup group1 if not exists on nc1;
+
+create dataset Customers(CustomerType)
+ partitioned by key cid on group1;
+
+load dataset Customers
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/index-selection_inverted-index-olist-edit-distance.adm";
+
+for $c in dataset('Customers')
+let $ed := edit-distance-check($c.interests, ["computers", "wine", "walking"], 1)
+where $ed[0]
+order by $c.cid
+return $c
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-olist-jaccard.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-olist-jaccard.aql
new file mode 100644
index 0000000..8e2d1e7
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-olist-jaccard.aql
@@ -0,0 +1,36 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+ number: int32,
+ street: string,
+ city: string
+}
+
+create type CustomerType as closed {
+ cid: int32,
+ name: string,
+ age: int32?,
+ address: AddressType?,
+ interests: [string],
+ children: [ { name: string, age: int32? } ]
+}
+
+create nodegroup group1 if not exists on nc1;
+
+create dataset Customers(CustomerType)
+ partitioned by key cid on group1;
+
+load dataset Customers
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/index-selection_inverted-index-olist-jaccard.adm";
+
+for $c in dataset('Customers')
+let $jacc := similarity-jaccard-check($c.interests, ["databases", "computers", "wine"], 0.7f)
+where $jacc[0]
+return $c
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ulist-jaccard.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ulist-jaccard.aql
new file mode 100644
index 0000000..6a0e266
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ulist-jaccard.aql
@@ -0,0 +1,36 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+ number: int32,
+ street: string,
+ city: string
+}
+
+create type CustomerType as closed {
+ cid: int32,
+ name: string,
+ age: int32?,
+ address: AddressType?,
+ interests: {{string}},
+ children: [ { name: string, age: int32? } ]
+}
+
+create nodegroup group1 if not exists on nc1;
+
+create dataset Customers(CustomerType)
+ partitioned by key cid on group1;
+
+load dataset Customers
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/semistructured/co1k/customer.adm"),("format"="adm"));
+
+create index interests_index on Customers(interests) type keyword;
+
+write output to nc1:"rttest/index-selection_inverted-index-ulist-jaccard.adm";
+
+for $c in dataset('Customers')
+let $jacc := similarity-jaccard-check($c.interests, ["databases", "computers", "wine"], 0.7f)
+where $jacc[0]
+return $c
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-word-contains.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-word-contains.aql
new file mode 100644
index 0000000..348f686
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-word-contains.aql
@@ -0,0 +1,29 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+ partitioned by key id on group1;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index keyword_index on DBLP(title) type keyword;
+
+write output to nc1:"rttest/index-selection_inverted-index-word-contains.adm";
+
+for $o in dataset('DBLP')
+where contains($o.title, "Multimedia")
+order by $o.id
+return $o
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-word-jaccard.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-word-jaccard.aql
new file mode 100644
index 0000000..9852e67
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-word-jaccard.aql
@@ -0,0 +1,30 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+ partitioned by key id on group1;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+create index keyword_index on DBLP(title) type keyword;
+
+write output to nc1:"rttest/index-selection_inverted-index-word-jaccard.adm";
+
+for $o in dataset('DBLP')
+let $jacc := similarity-jaccard-check(word-tokens($o.title), word-tokens("Transactions for Cooperative Environments"), 0.5f)
+where $jacc[0]
+return $o
+
diff --git a/asterix-app/src/test/resources/runtimets/queries/index/orders-index-custkey-conjunctive-open.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/orders-index-custkey-conjunctive-open.aql
similarity index 90%
copy from asterix-app/src/test/resources/runtimets/queries/index/orders-index-custkey-conjunctive-open.aql
copy to asterix-app/src/test/resources/runtimets/queries/index-selection/orders-index-custkey-conjunctive-open.aql
index 1a29b28..1a0ecbc 100644
--- a/asterix-app/src/test/resources/runtimets/queries/index/orders-index-custkey-conjunctive-open.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/orders-index-custkey-conjunctive-open.aql
@@ -23,7 +23,7 @@
create index idx_Orders_Custkey on Orders(o_custkey) ;
-write output to nc1:"rttest/index_orders-index-custkey-conjunctive-open.adm";
+write output to nc1:"rttest/index-selection_orders-index-custkey-conjunctive-open.adm";
for $o in dataset('Orders')
where
diff --git a/asterix-app/src/test/resources/runtimets/queries/index/orders-index-custkey-conjunctive-open.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/orders-index-custkey-conjunctive.aql
similarity index 90%
rename from asterix-app/src/test/resources/runtimets/queries/index/orders-index-custkey-conjunctive-open.aql
rename to asterix-app/src/test/resources/runtimets/queries/index-selection/orders-index-custkey-conjunctive.aql
index 1a29b28..ceca42e 100644
--- a/asterix-app/src/test/resources/runtimets/queries/index/orders-index-custkey-conjunctive-open.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/orders-index-custkey-conjunctive.aql
@@ -23,7 +23,7 @@
create index idx_Orders_Custkey on Orders(o_custkey) ;
-write output to nc1:"rttest/index_orders-index-custkey-conjunctive-open.adm";
+write output to nc1:"rttest/index-selection_orders-index-custkey-conjunctive.adm";
for $o in dataset('Orders')
where
diff --git a/asterix-app/src/test/resources/runtimets/queries/index/orders-index-custkey-open.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/orders-index-custkey-open.aql
similarity index 91%
rename from asterix-app/src/test/resources/runtimets/queries/index/orders-index-custkey-open.aql
rename to asterix-app/src/test/resources/runtimets/queries/index-selection/orders-index-custkey-open.aql
index f3a9f1f..281f566 100644
--- a/asterix-app/src/test/resources/runtimets/queries/index/orders-index-custkey-open.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/orders-index-custkey-open.aql
@@ -26,7 +26,7 @@
create index idx_Orders_Custkey on Orders(o_custkey);
-write output to nc1:"rttest/index_orders-index-custkey-open.adm";
+write output to nc1:"rttest/index-selection_orders-index-custkey-open.adm";
for $o in dataset('Orders')
where
diff --git a/asterix-app/src/test/resources/runtimets/queries/index/orders-index-custkey.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/orders-index-custkey.aql
similarity index 91%
rename from asterix-app/src/test/resources/runtimets/queries/index/orders-index-custkey.aql
rename to asterix-app/src/test/resources/runtimets/queries/index-selection/orders-index-custkey.aql
index 38bc76c..365cfcb 100644
--- a/asterix-app/src/test/resources/runtimets/queries/index/orders-index-custkey.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/orders-index-custkey.aql
@@ -23,7 +23,7 @@
create index idx_Orders_Custkey on Orders(o_custkey);
-write output to nc1:"rttest/index_orders-index-custkey.adm";
+write output to nc1:"rttest/index-selection_orders-index-custkey.adm";
for $o in dataset('Orders')
where
diff --git a/asterix-app/src/test/resources/runtimets/queries/index/range-search-open.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/range-search-open.aql
similarity index 92%
rename from asterix-app/src/test/resources/runtimets/queries/index/range-search-open.aql
rename to asterix-app/src/test/resources/runtimets/queries/index-selection/range-search-open.aql
index 1781280..099e2d2 100644
--- a/asterix-app/src/test/resources/runtimets/queries/index/range-search-open.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/range-search-open.aql
@@ -32,7 +32,8 @@
create index idx_LineItem_partkey on LineItem(l_linenumber);
create index idx_LineItem_suppkey on LineItem(l_suppkey);
-write output to nc1:"rttest/index_range-search-open.adm";
+write output to nc1:"rttest/index-selection_range-search-open.adm";
+
for $c in dataset('LineItem')
where $c.l_suppkey < 100 and $c.l_suppkey>5
order by $c.l_orderkey, $c.l_linenumber
diff --git a/asterix-app/src/test/resources/runtimets/queries/index/range-search.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/range-search.aql
similarity index 93%
rename from asterix-app/src/test/resources/runtimets/queries/index/range-search.aql
rename to asterix-app/src/test/resources/runtimets/queries/index-selection/range-search.aql
index 666dc73..62714ed 100644
--- a/asterix-app/src/test/resources/runtimets/queries/index/range-search.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/range-search.aql
@@ -32,7 +32,8 @@
create index idx_LineItem_partkey on LineItem(l_linenumber);
create index idx_LineItem_suppkey on LineItem(l_suppkey);
-write output to nc1:"rttest/index_range-search.adm";
+write output to nc1:"rttest/index-selection_range-search.adm";
+
for $c in dataset('LineItem')
where $c.l_suppkey < 100 and $c.l_suppkey>5
order by $c.l_orderkey, $c.l_linenumber
diff --git a/asterix-app/src/test/resources/runtimets/queries/index/rtree-secondary-index-nullable.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/rtree-secondary-index-nullable.aql
similarity index 89%
rename from asterix-app/src/test/resources/runtimets/queries/index/rtree-secondary-index-nullable.aql
rename to asterix-app/src/test/resources/runtimets/queries/index-selection/rtree-secondary-index-nullable.aql
index a4555f4..6ca6d28 100644
--- a/asterix-app/src/test/resources/runtimets/queries/index/rtree-secondary-index-nullable.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/rtree-secondary-index-nullable.aql
@@ -22,7 +22,7 @@
create index rtree_index_point on MyData(point) type rtree;
-write output to nc1:"rttest/index_rtree-secondary-index-nullable.adm";
+write output to nc1:"rttest/index-selection_rtree-secondary-index-nullable.adm";
for $o in dataset('MyData')
where spatial-intersect($o.point, create-polygon(create-point(4.0,1.0), create-point(4.0,4.0), create-point(12.0,4.0), create-point(12.0,1.0)))
diff --git a/asterix-app/src/test/resources/runtimets/queries/index/rtree-secondary-index-open.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/rtree-secondary-index-open.aql
similarity index 90%
rename from asterix-app/src/test/resources/runtimets/queries/index/rtree-secondary-index-open.aql
rename to asterix-app/src/test/resources/runtimets/queries/index-selection/rtree-secondary-index-open.aql
index c428af2..44425cc 100644
--- a/asterix-app/src/test/resources/runtimets/queries/index/rtree-secondary-index-open.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/rtree-secondary-index-open.aql
@@ -23,7 +23,7 @@
create index rtree_index_point on MyData(point) type rtree;
-write output to nc1:"rttest/index_rtree-secondary-index-open.adm";
+write output to nc1:"rttest/index-selection_rtree-secondary-index-open.adm";
for $o in dataset('MyData')
where spatial-intersect($o.point, create-polygon(create-point(4.0,1.0), create-point(4.0,4.0), create-point(12.0,4.0), create-point(12.0,1.0)))
diff --git a/asterix-app/src/test/resources/runtimets/queries/index/rtree-secondary-index.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/rtree-secondary-index.aql
similarity index 90%
rename from asterix-app/src/test/resources/runtimets/queries/index/rtree-secondary-index.aql
rename to asterix-app/src/test/resources/runtimets/queries/index-selection/rtree-secondary-index.aql
index 01b2981..7ff775c 100644
--- a/asterix-app/src/test/resources/runtimets/queries/index/rtree-secondary-index.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/rtree-secondary-index.aql
@@ -22,7 +22,7 @@
create index rtree_index_point on MyData(point) type rtree;
-write output to nc1:"rttest/index_rtree-secondary-index.adm";
+write output to nc1:"rttest/index-selection_rtree-secondary-index.adm";
for $o in dataset('MyData')
where spatial-intersect($o.point, create-polygon(create-point(4.0,1.0), create-point(4.0,4.0), create-point(12.0,4.0), create-point(12.0,1.0)))
diff --git a/asterix-app/src/test/resources/runtimets/queries/index/orders-index-custkey-conjunctive.aql b/asterix-app/src/test/resources/runtimets/queries/index/orders-index-custkey-conjunctive.aql
deleted file mode 100644
index 10db179..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/index/orders-index-custkey-conjunctive.aql
+++ /dev/null
@@ -1,35 +0,0 @@
-drop dataverse tpch if exists;
-create dataverse tpch;
-use dataverse tpch;
-
-create type OrderType as closed {
- o_orderkey: int32,
- o_custkey: int32,
- o_orderstatus: string,
- o_totalprice: double,
- o_orderdate: string,
- o_orderpriority: string,
- o_clerk: string,
- o_shippriority: int32,
- o_comment: string
-}
-
-create dataset Orders(OrderType)
- partitioned by key o_orderkey;
-
-load dataset Orders
-using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/tpch0.001/orders.tbl"),("format"="delimited-text"),("delimiter"="|")) pre-sorted;
-
-create index idx_Orders_Custkey on Orders(o_custkey) ;
-
-write output to nc1:"rttest/index_orders-index-custkey-conjunctive.adm";
-
-for $o in dataset('Orders')
-where
- $o.o_custkey = 40 and $o.o_totalprice > 150000.0
-order by $o.o_orderkey
-return {
- "o_orderkey": $o.o_orderkey,
- "o_custkey": $o.o_custkey
-}
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_01.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_01.aql
deleted file mode 100644
index 2034c38..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_01.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_edit-distance-check_01.adm";
-
-let $a := "Nalini Venkatasubramanian"
-let $b := "Nalini Wekatasupramanian"
-let $ed := edit-distance-check($a, $b, 3)
-return $ed
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_02.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_02.aql
deleted file mode 100644
index c3d5342..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_02.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_edit-distance-check_02.adm";
-
-let $a := "Nalini Venkatasubramanian"
-let $b := "Nalini Wekatasupramanian"
-let $ed := edit-distance-check($a, $b, 2)
-return $ed
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_03.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_03.aql
deleted file mode 100644
index ecf556c..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_03.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_edit-distance-check_03.adm";
-
-let $a := [1, 2, 3, 4, 5, 6, 7]
-let $b := [1, 3, 4, 5, 7, 8]
-let $ed := edit-distance-check($a, $b, 3)
-return $ed
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_04.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_04.aql
deleted file mode 100644
index 80a2da6..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_04.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_edit-distance-check_04.adm";
-
-let $a := [1, 2, 3, 4, 5, 6, 7]
-let $b := [1, 3, 4, 5, 7, 8]
-let $ed := edit-distance-check($a, $b, 2)
-return $ed
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_ints.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_ints.aql
new file mode 100644
index 0000000..3b5fd8d
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_ints.aql
@@ -0,0 +1,17 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_edit-distance-check_ints.adm";
+
+let $a := [1, 2, 3, 4, 5, 6, 7]
+let $b := [1, 3, 4, 5, 7, 8]
+let $results :=
+[
+ edit-distance-check($a, $b, 3),
+ edit-distance-check($b, $a, 3),
+ edit-distance-check($a, $b, 2),
+ edit-distance-check($b, $a, 2)
+]
+for $i in $results
+return $i
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_strings.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_strings.aql
new file mode 100644
index 0000000..e861679
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_strings.aql
@@ -0,0 +1,17 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_edit-distance-check_strings.adm";
+
+let $a := "Nalini Venkatasubramanian"
+let $b := "Nalini Wekatasupramanian"
+let $results :=
+[
+ edit-distance-check($a, $b, 3),
+ edit-distance-check($b, $a, 3),
+ edit-distance-check($a, $b, 2),
+ edit-distance-check($b, $a, 2)
+]
+for $i in $results
+return $i
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-list-is-filterable.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-list-is-filterable.aql
new file mode 100644
index 0000000..ecfbd52
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-list-is-filterable.aql
@@ -0,0 +1,19 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_edit-distance-list-is-filterable.adm";
+
+let $a := []
+let $b := [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+let $results :=
+[
+ edit-distance-list-is-filterable($a, 0),
+ edit-distance-list-is-filterable($a, 3),
+ edit-distance-list-is-filterable($b, 0),
+ edit-distance-list-is-filterable($b, 3),
+ edit-distance-list-is-filterable($b, 8),
+ edit-distance-list-is-filterable($b, 11)
+]
+for $i in $results
+return $i
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-string-is-filterable.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-string-is-filterable.aql
new file mode 100644
index 0000000..c306e0c
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-string-is-filterable.aql
@@ -0,0 +1,33 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_edit-distance-string-is-filterable.adm";
+
+let $a := ""
+let $b := "abcdefghij"
+let $results :=
+[
+ edit-distance-string-is-filterable($a, 0, 2, false),
+ edit-distance-string-is-filterable($a, 0, 2, true),
+ edit-distance-string-is-filterable($a, 1, 2, false),
+ edit-distance-string-is-filterable($a, 1, 2, true),
+ edit-distance-string-is-filterable($b, 0, 2, false),
+ edit-distance-string-is-filterable($b, 0, 2, true),
+ edit-distance-string-is-filterable($b, 1, 2, false),
+ edit-distance-string-is-filterable($b, 1, 2, true),
+ edit-distance-string-is-filterable($b, 4, 2, false),
+ edit-distance-string-is-filterable($b, 5, 2, true),
+ edit-distance-string-is-filterable($b, 5, 2, false),
+ edit-distance-string-is-filterable($b, 6, 2, true),
+ edit-distance-string-is-filterable($b, 0, 3, false),
+ edit-distance-string-is-filterable($b, 0, 3, true),
+ edit-distance-string-is-filterable($b, 1, 3, false),
+ edit-distance-string-is-filterable($b, 1, 3, true),
+ edit-distance-string-is-filterable($b, 2, 3, false),
+ edit-distance-string-is-filterable($b, 3, 3, true),
+ edit-distance-string-is-filterable($b, 3, 3, false),
+ edit-distance-string-is-filterable($b, 4, 3, true)
+]
+for $i in $results
+return $i
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_01.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_01.aql
deleted file mode 100644
index 3365690..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_01.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_edit-distance_01.adm";
-
-let $a := "Nalini Venkatasubramanian"
-let $b := "Nalini Wekatasupramanian"
-let $ed := edit-distance($a, $b)
-return $ed
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_02.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_02.aql
deleted file mode 100644
index bf0df90..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_02.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_edit-distance_02.adm";
-
-let $a := [1, 2, 3, 4, 5, 6, 7]
-let $b := [1, 3, 4, 5, 7, 8]
-let $ed := edit-distance($a, $b)
-return $ed
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_ints.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_ints.aql
new file mode 100644
index 0000000..9cd7dc6
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_ints.aql
@@ -0,0 +1,15 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_edit-distance_ints.adm";
+
+let $a := [1, 2, 3, 4, 5, 6, 7]
+let $b := [1, 3, 4, 5, 7, 8]
+let $results :=
+[
+ edit-distance($a, $b),
+ edit-distance($b, $a)
+]
+for $i in $results
+return $i
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_strings.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_strings.aql
new file mode 100644
index 0000000..a721c40
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_strings.aql
@@ -0,0 +1,15 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_edit-distance_strings.adm";
+
+let $a := "Nalini Venkatasubramanian"
+let $b := "Nalini Wekatasupramanian"
+let $results :=
+[
+ edit-distance($a, $b),
+ edit-distance($b, $a)
+]
+for $i in $results
+return $i
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/fuzzyeq-edit-distance.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/fuzzyeq-edit-distance.aql
new file mode 100644
index 0000000..d3eed71
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/fuzzyeq-edit-distance.aql
@@ -0,0 +1,29 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+ partitioned by key id on group1;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_fuzzyeq-edit-distance.adm";
+
+set simfunction 'edit-distance';
+set simthreshold '2';
+
+for $paper in dataset('DBLP')
+where $paper.authors ~= "Amihay Motro"
+return $paper
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/fuzzyeq-similarity-jaccard.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/fuzzyeq-similarity-jaccard.aql
new file mode 100644
index 0000000..0c11edb
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/fuzzyeq-similarity-jaccard.aql
@@ -0,0 +1,29 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+ partitioned by key id on group1;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_fuzzyeq-similarity-jaccard.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.5f';
+
+for $paper in dataset('DBLP')
+where word-tokens($paper.title) ~= word-tokens("Transactions for Cooperative Environments")
+return $paper
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/prefix-len-jaccard_01.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/prefix-len-jaccard.aql
similarity index 80%
rename from asterix-app/src/test/resources/runtimets/queries/similarity/prefix-len-jaccard_01.aql
rename to asterix-app/src/test/resources/runtimets/queries/similarity/prefix-len-jaccard.aql
index 772e64e..63424f4 100644
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/prefix-len-jaccard_01.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/prefix-len-jaccard.aql
@@ -2,7 +2,7 @@
create dataverse test;
use dataverse test;
-write output to nc1:"rttest/similarity_prefix-len-jaccard_01.adm";
+write output to nc1:"rttest/similarity_prefix-len-jaccard.adm";
for $l in [1]
return [
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_01.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_01.aql
deleted file mode 100644
index cca0f99..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_01.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_similarity-jaccard-check_01.adm";
-
-let $a := [1, 2, 3, 4, 5, 8, 9]
-let $b := [1, 2, 3, 4, 5, 7, 8, 9, 10, 11]
-let $jacc := similarity-jaccard-check($a, $b, 0.7f)
-return $jacc
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_02.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_02.aql
deleted file mode 100644
index 38fe1f4..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_02.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_similarity-jaccard-check_02.adm";
-
-let $a := [1, 2, 3, 4, 5, 8, 9]
-let $b := [1, 2, 3, 4, 5, 7, 8, 9, 10, 11]
-let $jacc := similarity-jaccard-check($a, $b, 0.8f)
-return $jacc
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_ints.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_ints.aql
new file mode 100644
index 0000000..0791f0c
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_ints.aql
@@ -0,0 +1,30 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_similarity-jaccard-check_ints.adm";
+
+let $a := [ ]
+let $b := [1, 2, 3, 4, 5]
+let $c := [4, 3, 5, 8, 9, 2, 1]
+let $d := [7, 5, 8, 9, 3, 10, 1, 2, 11, 4]
+let $e := [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
+let $f := [1, 2, 3, 4, 5, 7, 8, 9, 10, 11]
+let $results :=
+[
+ similarity-jaccard-check($a, $b, 0.0f),
+ similarity-jaccard-check($b, $a, 0.0f),
+ similarity-jaccard-check($a, $b, 0.1f),
+ similarity-jaccard-check($b, $a, 0.1f),
+ similarity-jaccard-check($c, $d, 0.6f),
+ similarity-jaccard-check($d, $c, 0.6f),
+ similarity-jaccard-check($c, $d, 0.8f),
+ similarity-jaccard-check($d, $c, 0.8f),
+ similarity-jaccard-check($e, $f, 0.05f),
+ similarity-jaccard-check($f, $e, 0.05f),
+ similarity-jaccard-check($e, $f, 0.8f),
+ similarity-jaccard-check($f, $e, 0.8f)
+
+]
+for $i in $results
+return $i
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_query.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_query.aql
new file mode 100644
index 0000000..01bea0b
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_query.aql
@@ -0,0 +1,29 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+ partitioned by key id on group1;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_similarity-jaccard-check_query.adm";
+
+for $paper in dataset('DBLP')
+let $paper_tokens := word-tokens($paper.title)
+let $query_tokens := word-tokens("Environments for Cooperative Transactions")
+let $jacc := similarity-jaccard-check($paper_tokens, $query_tokens, 0.5f)
+where $jacc[0]
+return $paper
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_strings.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_strings.aql
new file mode 100644
index 0000000..dadca7b
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_strings.aql
@@ -0,0 +1,35 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_similarity-jaccard-check_strings.adm";
+
+let $a := [ ]
+let $b := ["abc", "bcd", "cde", "def", "efg"]
+let $c := ["efg", "abc", "cde", "def", "hij", "ijk", "bcd"]
+let $d := ["abc", "ijk", "bcd", "efg", "fgh", "ghi", "def", "hij", "jkl", "cde"]
+let $e := ["Efg", "aBc", "cdE", "DEf", "hIJ", "IjK", "BCD"]
+let $f := ["abc", "ijk", "bcd", "efg", "fgh", "ghi", "def", "hij", "jkl", "cde"]
+let $g := ["cde", "zza", "zzb", "zzc", "zwz", "za", "zbe", "zer", "zba", "zfe", "wab"]
+let $h := ["abc", "ijk", "bcd", "efg", "fgh", "ghi", "def", "hij", "jkl", "cde"]
+let $results :=
+[
+ similarity-jaccard-check($a, $b, 0.0f),
+ similarity-jaccard-check($b, $a, 0.0f),
+ similarity-jaccard-check($a, $b, 0.1f),
+ similarity-jaccard-check($b, $a, 0.1f),
+ similarity-jaccard-check($c, $d, 0.6f),
+ similarity-jaccard-check($d, $c, 0.6f),
+ similarity-jaccard-check($c, $d, 0.8f),
+ similarity-jaccard-check($d, $c, 0.8f),
+ similarity-jaccard-check($e, $f, 0.6f),
+ similarity-jaccard-check($f, $e, 0.6f),
+ similarity-jaccard-check($e, $f, 0.8f),
+ similarity-jaccard-check($f, $e, 0.8f),
+ similarity-jaccard-check($g, $h, 0.05f),
+ similarity-jaccard-check($h, $g, 0.05f),
+ similarity-jaccard-check($g, $h, 0.8f),
+ similarity-jaccard-check($h, $g, 0.8f)
+]
+for $i in $results
+return $i
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix-check_01.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix-check.aql
similarity index 96%
rename from asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix-check_01.aql
rename to asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix-check.aql
index 73c7ebc..7a48854 100644
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix-check_01.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix-check.aql
@@ -2,18 +2,14 @@
create dataverse test;
use dataverse test;
-write output to nc1:"rttest/similarity_similarity-jaccard-prefix-check_01.adm";
+write output to nc1:"rttest/similarity_similarity-jaccard-prefix-check.adm";
for $l in [1]
return [
similarity-jaccard-prefix-check(3, [1, 2, 3], 3, [1, 2, 3], 1, 1f),
-
similarity-jaccard-prefix-check(3, [1, 2, 3], 3, [1, 2, 4], 1, .5f),
similarity-jaccard-prefix-check(3, [1, 2, 3], 3, [1, 2, 4], 1, .6f),
-
-
similarity-jaccard-prefix-check(3, [1, 2, 3], 9, [1, 2, 3], 1, .5f),
-
similarity-jaccard-prefix-check(4, [1, 2, 3, 4], 2, [1, 2], 1, .5f),
similarity-jaccard-prefix-check(4, [1, 2, 3, 4], 4, [1, 2], 1, .33f)
]
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix_01.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix.aql
similarity index 97%
rename from asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix_01.aql
rename to asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix.aql
index 35b4719..50c857b 100644
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix_01.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix.aql
@@ -2,18 +2,14 @@
create dataverse test;
use dataverse test;
-write output to nc1:"rttest/similarity_similarity-jaccard-prefix_01.adm";
+write output to nc1:"rttest/similarity_similarity-jaccard-prefix.adm";
for $l in [1]
return [
similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 3], 1, 1f),
-
similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 4], 1, .5f),
similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 4], 1, .6f),
-
-
similarity-jaccard-prefix(3, [1, 2, 3], 9, [1, 2, 3], 1, .5f),
-
similarity-jaccard-prefix(4, [1, 2, 3, 4], 2, [1, 2], 1, .5f),
similarity-jaccard-prefix(4, [1, 2, 3, 4], 4, [1, 2], 1, .33f)
]
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted-check_ints.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted-check_ints.aql
new file mode 100644
index 0000000..b48bdd3
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted-check_ints.aql
@@ -0,0 +1,23 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_similarity-jaccard-sorted-check_ints.adm";
+
+let $a := [ ]
+let $b := [1, 2, 3, 4, 5]
+let $c := [1, 2, 3, 4, 5, 8, 9]
+let $d := [1, 2, 3, 4, 5, 7, 8, 9, 10, 11]
+let $results :=
+[
+ similarity-jaccard-sorted-check($a, $b, 0.0f),
+ similarity-jaccard-sorted-check($b, $a, 0.0f),
+ similarity-jaccard-sorted-check($a, $b, 0.1f),
+ similarity-jaccard-sorted-check($b, $a, 0.1f),
+ similarity-jaccard-sorted-check($c, $d, 0.6f),
+ similarity-jaccard-sorted-check($d, $c, 0.6f),
+ similarity-jaccard-sorted-check($c, $d, 0.8f),
+ similarity-jaccard-sorted-check($d, $c, 0.8f)
+]
+for $i in $results
+return $i
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted-check_query.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted-check_query.aql
new file mode 100644
index 0000000..aa5b067
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted-check_query.aql
@@ -0,0 +1,29 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+ partitioned by key id on group1;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_similarity-jaccard-sorted-check_query.adm";
+
+for $paper in dataset('DBLP')
+let $paper_tokens := word-tokens($paper.title)
+let $query_tokens := word-tokens("Cooperative Transactions for Environments")
+let $jacc := similarity-jaccard-sorted-check($paper_tokens, $query_tokens, 0.5f)
+where $jacc[0]
+return $paper
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted-check_strings.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted-check_strings.aql
new file mode 100644
index 0000000..3fdf844
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted-check_strings.aql
@@ -0,0 +1,29 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_similarity-jaccard-sorted-check_strings.adm";
+
+let $a := [ ]
+let $b := ["abc", "bcd", "cde", "def", "efg"]
+let $c := ["abc", "bcd", "cde", "def", "efg", "hij", "ijk"]
+let $d := ["abc", "bcd", "cde", "def", "efg", "fgh", "ghi", "hij", "ijk", "jkl"]
+let $e := ["Abc", "bCd", "cdE", "DEf", "eFG", "HiJ", "IJK"]
+let $f := ["abc", "bcd", "cde", "def", "efg", "fgh", "ghi", "hij", "ijk", "jkl"]
+let $results :=
+[
+ similarity-jaccard-sorted-check($a, $b, 0.0f),
+ similarity-jaccard-sorted-check($b, $a, 0.0f),
+ similarity-jaccard-sorted-check($a, $b, 0.1f),
+ similarity-jaccard-sorted-check($b, $a, 0.1f),
+ similarity-jaccard-sorted-check($c, $d, 0.6f),
+ similarity-jaccard-sorted-check($d, $c, 0.6f),
+ similarity-jaccard-sorted-check($c, $d, 0.8f),
+ similarity-jaccard-sorted-check($d, $c, 0.8f),
+ similarity-jaccard-sorted-check($e, $f, 0.6f),
+ similarity-jaccard-sorted-check($f, $e, 0.6f),
+ similarity-jaccard-sorted-check($e, $f, 0.8f),
+ similarity-jaccard-sorted-check($f, $e, 0.8f)
+]
+for $i in $results
+return $i
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted_ints.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted_ints.aql
new file mode 100644
index 0000000..5fefbf5
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted_ints.aql
@@ -0,0 +1,19 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_similarity-jaccard-sorted_ints.adm";
+
+let $a := [ ]
+let $b := [1, 2, 3, 4, 5]
+let $c := [1, 2, 3, 4, 5, 8, 9]
+let $d := [1, 2, 3, 4, 5, 7, 8, 9, 10, 11]
+let $results :=
+[
+ similarity-jaccard-sorted($a, $b),
+ similarity-jaccard-sorted($b, $a),
+ similarity-jaccard-sorted($c, $d),
+ similarity-jaccard-sorted($d, $c)
+]
+for $i in $results
+return $i
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted_query.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted_query.aql
new file mode 100644
index 0000000..a2373af
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted_query.aql
@@ -0,0 +1,28 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+ partitioned by key id on group1;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_similarity-jaccard-sorted_query.adm";
+
+for $paper in dataset('DBLP')
+let $paper_tokens := word-tokens($paper.title)
+let $query_tokens := word-tokens("Cooperative Transactions for Environments")
+where similarity-jaccard-sorted($paper_tokens, $query_tokens) >= 0.5
+return $paper
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted_strings.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted_strings.aql
new file mode 100644
index 0000000..67a87d1
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted_strings.aql
@@ -0,0 +1,23 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_similarity-jaccard-sorted_strings.adm";
+
+let $a := [ ]
+let $b := ["abc", "bcd", "cde", "def", "efg"]
+let $c := ["abc", "bcd", "cde", "def", "efg", "hij", "ijk"]
+let $d := ["abc", "bcd", "cde", "def", "efg", "fgh", "ghi", "hij", "ijk", "jkl"]
+let $e := ["Abc", "bCd", "cdE", "DEf", "eFG", "HiJ", "IJK"]
+let $f := ["abc", "bcd", "cde", "def", "efg", "fgh", "ghi", "hij", "ijk", "jkl"]
+let $results :=
+[
+ similarity-jaccard-sorted($a, $b),
+ similarity-jaccard-sorted($b, $a),
+ similarity-jaccard-sorted($c, $d),
+ similarity-jaccard-sorted($d, $c),
+ similarity-jaccard-sorted($e, $f),
+ similarity-jaccard-sorted($f, $e)
+]
+for $i in $results
+return $i
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_01.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_01.aql
deleted file mode 100644
index ae0747e..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_01.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_similarity-jaccard_01.adm";
-
-let $a := [1, 2, 3, 4, 5, 8, 9]
-let $b := [1, 2, 3, 4, 5, 7, 8, 9, 10, 11]
-let $jacc := similarity-jaccard($a, $b)
-return $jacc
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_02.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_02.aql
deleted file mode 100644
index d0a7e1f..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_02.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_similarity-jaccard_02.adm";
-
-let $a := ["a"]
-let $b := ["b"]
-let $jacc := similarity-jaccard($a, $b)
-return $jacc
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_03.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_03.aql
deleted file mode 100644
index fd5dd21..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_03.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_similarity-jaccard_03.adm";
-
-let $a := ["a"]
-let $b := [ ]
-let $jacc := similarity-jaccard($a, $b)
-return $jacc
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_ints.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_ints.aql
new file mode 100644
index 0000000..ee20d00
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_ints.aql
@@ -0,0 +1,23 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_similarity-jaccard_ints.adm";
+
+let $a := [ ]
+let $b := [1, 2, 3, 4, 5]
+let $c := [1, 2, 3, 4, 5, 8, 9]
+let $d := [1, 2, 3, 4, 5, 7, 8, 9, 10, 11]
+let $e := [4, 3, 5, 8, 9, 2, 1]
+let $f := [7, 5, 8, 9, 3, 10, 1, 2, 11, 4]
+let $results :=
+[
+ similarity-jaccard($a, $b),
+ similarity-jaccard($b, $a),
+ similarity-jaccard($c, $d),
+ similarity-jaccard($d, $c),
+ similarity-jaccard($e, $f),
+ similarity-jaccard($f, $e)
+]
+for $i in $results
+return $i
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_query.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_query.aql
new file mode 100644
index 0000000..05f3a61
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_query.aql
@@ -0,0 +1,28 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+ partitioned by key id on group1;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_similarity-jaccard_query.adm";
+
+for $paper in dataset('DBLP')
+let $paper_tokens := word-tokens($paper.title)
+let $query_tokens := word-tokens("Transactions for Cooperative Environments")
+where similarity-jaccard($paper_tokens, $query_tokens) >= 0.5f
+return $paper
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_strings.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_strings.aql
new file mode 100644
index 0000000..107d92f
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_strings.aql
@@ -0,0 +1,27 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_similarity-jaccard_strings.adm";
+
+let $a := [ ]
+let $b := ["abc", "bcd", "cde", "def", "efg"]
+let $c := ["abc", "bcd", "cde", "def", "efg", "hij", "ijk"]
+let $d := ["abc", "bcd", "cde", "def", "efg", "fgh", "ghi", "hij", "ijk", "jkl"]
+let $e := ["efg", "abc", "cde", "def", "hij", "ijk", "bcd"]
+let $f := ["abc", "ijk", "bcd", "efg", "fgh", "ghi", "def", "hij", "jkl", "cde"]
+let $g := ["Efg", "aBc", "cdE", "DEf", "hIJ", "IjK", "BCD"]
+let $h := ["abc", "ijk", "bcd", "efg", "fgh", "ghi", "def", "hij", "jkl", "cde"]
+let $results :=
+[
+ similarity-jaccard($a, $b),
+ similarity-jaccard($b, $a),
+ similarity-jaccard($c, $d),
+ similarity-jaccard($d, $c),
+ similarity-jaccard($e, $f),
+ similarity-jaccard($f, $e),
+ similarity-jaccard($g, $h),
+ similarity-jaccard($h, $g)
+]
+for $i in $results
+return $i
\ No newline at end of file