Reintegrated asterix-fuzzy.
git-svn-id: https://asterixdb.googlecode.com/svn/branches/asterix_stabilization@437 eaa15691-b419-025a-1212-ee371bd00084
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_01.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_01.aql
deleted file mode 100644
index 2034c38..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_01.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_edit-distance-check_01.adm";
-
-let $a := "Nalini Venkatasubramanian"
-let $b := "Nalini Wekatasupramanian"
-let $ed := edit-distance-check($a, $b, 3)
-return $ed
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_02.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_02.aql
deleted file mode 100644
index c3d5342..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_02.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_edit-distance-check_02.adm";
-
-let $a := "Nalini Venkatasubramanian"
-let $b := "Nalini Wekatasupramanian"
-let $ed := edit-distance-check($a, $b, 2)
-return $ed
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_03.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_03.aql
deleted file mode 100644
index ecf556c..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_03.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_edit-distance-check_03.adm";
-
-let $a := [1, 2, 3, 4, 5, 6, 7]
-let $b := [1, 3, 4, 5, 7, 8]
-let $ed := edit-distance-check($a, $b, 3)
-return $ed
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_04.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_04.aql
deleted file mode 100644
index 80a2da6..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_04.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_edit-distance-check_04.adm";
-
-let $a := [1, 2, 3, 4, 5, 6, 7]
-let $b := [1, 3, 4, 5, 7, 8]
-let $ed := edit-distance-check($a, $b, 2)
-return $ed
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_ints.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_ints.aql
new file mode 100644
index 0000000..3b5fd8d
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_ints.aql
@@ -0,0 +1,17 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_edit-distance-check_ints.adm";
+
+let $a := [1, 2, 3, 4, 5, 6, 7]
+let $b := [1, 3, 4, 5, 7, 8]
+let $results :=
+[
+ edit-distance-check($a, $b, 3),
+ edit-distance-check($b, $a, 3),
+ edit-distance-check($a, $b, 2),
+ edit-distance-check($b, $a, 2)
+]
+for $i in $results
+return $i
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_strings.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_strings.aql
new file mode 100644
index 0000000..e861679
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_strings.aql
@@ -0,0 +1,17 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_edit-distance-check_strings.adm";
+
+let $a := "Nalini Venkatasubramanian"
+let $b := "Nalini Wekatasupramanian"
+let $results :=
+[
+ edit-distance-check($a, $b, 3),
+ edit-distance-check($b, $a, 3),
+ edit-distance-check($a, $b, 2),
+ edit-distance-check($b, $a, 2)
+]
+for $i in $results
+return $i
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-list-is-filterable.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-list-is-filterable.aql
new file mode 100644
index 0000000..ecfbd52
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-list-is-filterable.aql
@@ -0,0 +1,19 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_edit-distance-list-is-filterable.adm";
+
+let $a := []
+let $b := [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+let $results :=
+[
+ edit-distance-list-is-filterable($a, 0),
+ edit-distance-list-is-filterable($a, 3),
+ edit-distance-list-is-filterable($b, 0),
+ edit-distance-list-is-filterable($b, 3),
+ edit-distance-list-is-filterable($b, 8),
+ edit-distance-list-is-filterable($b, 11)
+]
+for $i in $results
+return $i
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-string-is-filterable.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-string-is-filterable.aql
new file mode 100644
index 0000000..c306e0c
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-string-is-filterable.aql
@@ -0,0 +1,33 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_edit-distance-string-is-filterable.adm";
+
+let $a := ""
+let $b := "abcdefghij"
+let $results :=
+[
+ edit-distance-string-is-filterable($a, 0, 2, false),
+ edit-distance-string-is-filterable($a, 0, 2, true),
+ edit-distance-string-is-filterable($a, 1, 2, false),
+ edit-distance-string-is-filterable($a, 1, 2, true),
+ edit-distance-string-is-filterable($b, 0, 2, false),
+ edit-distance-string-is-filterable($b, 0, 2, true),
+ edit-distance-string-is-filterable($b, 1, 2, false),
+ edit-distance-string-is-filterable($b, 1, 2, true),
+ edit-distance-string-is-filterable($b, 4, 2, false),
+ edit-distance-string-is-filterable($b, 5, 2, true),
+ edit-distance-string-is-filterable($b, 5, 2, false),
+ edit-distance-string-is-filterable($b, 6, 2, true),
+ edit-distance-string-is-filterable($b, 0, 3, false),
+ edit-distance-string-is-filterable($b, 0, 3, true),
+ edit-distance-string-is-filterable($b, 1, 3, false),
+ edit-distance-string-is-filterable($b, 1, 3, true),
+ edit-distance-string-is-filterable($b, 2, 3, false),
+ edit-distance-string-is-filterable($b, 3, 3, true),
+ edit-distance-string-is-filterable($b, 3, 3, false),
+ edit-distance-string-is-filterable($b, 4, 3, true)
+]
+for $i in $results
+return $i
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_01.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_01.aql
deleted file mode 100644
index 3365690..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_01.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_edit-distance_01.adm";
-
-let $a := "Nalini Venkatasubramanian"
-let $b := "Nalini Wekatasupramanian"
-let $ed := edit-distance($a, $b)
-return $ed
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_02.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_02.aql
deleted file mode 100644
index bf0df90..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_02.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_edit-distance_02.adm";
-
-let $a := [1, 2, 3, 4, 5, 6, 7]
-let $b := [1, 3, 4, 5, 7, 8]
-let $ed := edit-distance($a, $b)
-return $ed
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_ints.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_ints.aql
new file mode 100644
index 0000000..9cd7dc6
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_ints.aql
@@ -0,0 +1,15 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_edit-distance_ints.adm";
+
+let $a := [1, 2, 3, 4, 5, 6, 7]
+let $b := [1, 3, 4, 5, 7, 8]
+let $results :=
+[
+ edit-distance($a, $b),
+ edit-distance($b, $a)
+]
+for $i in $results
+return $i
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_strings.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_strings.aql
new file mode 100644
index 0000000..a721c40
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance_strings.aql
@@ -0,0 +1,15 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_edit-distance_strings.adm";
+
+let $a := "Nalini Venkatasubramanian"
+let $b := "Nalini Wekatasupramanian"
+let $results :=
+[
+ edit-distance($a, $b),
+ edit-distance($b, $a)
+]
+for $i in $results
+return $i
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/fuzzyeq-edit-distance.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/fuzzyeq-edit-distance.aql
new file mode 100644
index 0000000..d3eed71
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/fuzzyeq-edit-distance.aql
@@ -0,0 +1,29 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+ partitioned by key id on group1;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_fuzzyeq-edit-distance.adm";
+
+set simfunction 'edit-distance';
+set simthreshold '2';
+
+for $paper in dataset('DBLP')
+where $paper.authors ~= "Amihay Motro"
+return $paper
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/fuzzyeq-similarity-jaccard.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/fuzzyeq-similarity-jaccard.aql
new file mode 100644
index 0000000..0c11edb
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/fuzzyeq-similarity-jaccard.aql
@@ -0,0 +1,29 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+ partitioned by key id on group1;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_fuzzyeq-similarity-jaccard.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.5f';
+
+for $paper in dataset('DBLP')
+where word-tokens($paper.title) ~= word-tokens("Transactions for Cooperative Environments")
+return $paper
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/prefix-len-jaccard_01.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/prefix-len-jaccard.aql
similarity index 80%
rename from asterix-app/src/test/resources/runtimets/queries/similarity/prefix-len-jaccard_01.aql
rename to asterix-app/src/test/resources/runtimets/queries/similarity/prefix-len-jaccard.aql
index 772e64e..63424f4 100644
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/prefix-len-jaccard_01.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/prefix-len-jaccard.aql
@@ -2,7 +2,7 @@
create dataverse test;
use dataverse test;
-write output to nc1:"rttest/similarity_prefix-len-jaccard_01.adm";
+write output to nc1:"rttest/similarity_prefix-len-jaccard.adm";
for $l in [1]
return [
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_01.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_01.aql
deleted file mode 100644
index cca0f99..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_01.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_similarity-jaccard-check_01.adm";
-
-let $a := [1, 2, 3, 4, 5, 8, 9]
-let $b := [1, 2, 3, 4, 5, 7, 8, 9, 10, 11]
-let $jacc := similarity-jaccard-check($a, $b, 0.7f)
-return $jacc
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_02.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_02.aql
deleted file mode 100644
index 38fe1f4..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_02.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_similarity-jaccard-check_02.adm";
-
-let $a := [1, 2, 3, 4, 5, 8, 9]
-let $b := [1, 2, 3, 4, 5, 7, 8, 9, 10, 11]
-let $jacc := similarity-jaccard-check($a, $b, 0.8f)
-return $jacc
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_ints.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_ints.aql
new file mode 100644
index 0000000..0791f0c
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_ints.aql
@@ -0,0 +1,30 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_similarity-jaccard-check_ints.adm";
+
+let $a := [ ]
+let $b := [1, 2, 3, 4, 5]
+let $c := [4, 3, 5, 8, 9, 2, 1]
+let $d := [7, 5, 8, 9, 3, 10, 1, 2, 11, 4]
+let $e := [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
+let $f := [1, 2, 3, 4, 5, 7, 8, 9, 10, 11]
+let $results :=
+[
+ similarity-jaccard-check($a, $b, 0.0f),
+ similarity-jaccard-check($b, $a, 0.0f),
+ similarity-jaccard-check($a, $b, 0.1f),
+ similarity-jaccard-check($b, $a, 0.1f),
+ similarity-jaccard-check($c, $d, 0.6f),
+ similarity-jaccard-check($d, $c, 0.6f),
+ similarity-jaccard-check($c, $d, 0.8f),
+ similarity-jaccard-check($d, $c, 0.8f),
+ similarity-jaccard-check($e, $f, 0.05f),
+ similarity-jaccard-check($f, $e, 0.05f),
+ similarity-jaccard-check($e, $f, 0.8f),
+ similarity-jaccard-check($f, $e, 0.8f)
+
+]
+for $i in $results
+return $i
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_query.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_query.aql
new file mode 100644
index 0000000..01bea0b
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_query.aql
@@ -0,0 +1,29 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+ partitioned by key id on group1;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_similarity-jaccard-check_query.adm";
+
+for $paper in dataset('DBLP')
+let $paper_tokens := word-tokens($paper.title)
+let $query_tokens := word-tokens("Environments for Cooperative Transactions")
+let $jacc := similarity-jaccard-check($paper_tokens, $query_tokens, 0.5f)
+where $jacc[0]
+return $paper
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_strings.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_strings.aql
new file mode 100644
index 0000000..dadca7b
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-check_strings.aql
@@ -0,0 +1,35 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_similarity-jaccard-check_strings.adm";
+
+let $a := [ ]
+let $b := ["abc", "bcd", "cde", "def", "efg"]
+let $c := ["efg", "abc", "cde", "def", "hij", "ijk", "bcd"]
+let $d := ["abc", "ijk", "bcd", "efg", "fgh", "ghi", "def", "hij", "jkl", "cde"]
+let $e := ["Efg", "aBc", "cdE", "DEf", "hIJ", "IjK", "BCD"]
+let $f := ["abc", "ijk", "bcd", "efg", "fgh", "ghi", "def", "hij", "jkl", "cde"]
+let $g := ["cde", "zza", "zzb", "zzc", "zwz", "za", "zbe", "zer", "zba", "zfe", "wab"]
+let $h := ["abc", "ijk", "bcd", "efg", "fgh", "ghi", "def", "hij", "jkl", "cde"]
+let $results :=
+[
+ similarity-jaccard-check($a, $b, 0.0f),
+ similarity-jaccard-check($b, $a, 0.0f),
+ similarity-jaccard-check($a, $b, 0.1f),
+ similarity-jaccard-check($b, $a, 0.1f),
+ similarity-jaccard-check($c, $d, 0.6f),
+ similarity-jaccard-check($d, $c, 0.6f),
+ similarity-jaccard-check($c, $d, 0.8f),
+ similarity-jaccard-check($d, $c, 0.8f),
+ similarity-jaccard-check($e, $f, 0.6f),
+ similarity-jaccard-check($f, $e, 0.6f),
+ similarity-jaccard-check($e, $f, 0.8f),
+ similarity-jaccard-check($f, $e, 0.8f),
+ similarity-jaccard-check($g, $h, 0.05f),
+ similarity-jaccard-check($h, $g, 0.05f),
+ similarity-jaccard-check($g, $h, 0.8f),
+ similarity-jaccard-check($h, $g, 0.8f)
+]
+for $i in $results
+return $i
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix-check_01.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix-check.aql
similarity index 96%
rename from asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix-check_01.aql
rename to asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix-check.aql
index 73c7ebc..7a48854 100644
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix-check_01.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix-check.aql
@@ -2,18 +2,14 @@
create dataverse test;
use dataverse test;
-write output to nc1:"rttest/similarity_similarity-jaccard-prefix-check_01.adm";
+write output to nc1:"rttest/similarity_similarity-jaccard-prefix-check.adm";
for $l in [1]
return [
similarity-jaccard-prefix-check(3, [1, 2, 3], 3, [1, 2, 3], 1, 1f),
-
similarity-jaccard-prefix-check(3, [1, 2, 3], 3, [1, 2, 4], 1, .5f),
similarity-jaccard-prefix-check(3, [1, 2, 3], 3, [1, 2, 4], 1, .6f),
-
-
similarity-jaccard-prefix-check(3, [1, 2, 3], 9, [1, 2, 3], 1, .5f),
-
similarity-jaccard-prefix-check(4, [1, 2, 3, 4], 2, [1, 2], 1, .5f),
similarity-jaccard-prefix-check(4, [1, 2, 3, 4], 4, [1, 2], 1, .33f)
]
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix_01.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix.aql
similarity index 97%
rename from asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix_01.aql
rename to asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix.aql
index 35b4719..50c857b 100644
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix_01.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-prefix.aql
@@ -2,18 +2,14 @@
create dataverse test;
use dataverse test;
-write output to nc1:"rttest/similarity_similarity-jaccard-prefix_01.adm";
+write output to nc1:"rttest/similarity_similarity-jaccard-prefix.adm";
for $l in [1]
return [
similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 3], 1, 1f),
-
similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 4], 1, .5f),
similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 4], 1, .6f),
-
-
similarity-jaccard-prefix(3, [1, 2, 3], 9, [1, 2, 3], 1, .5f),
-
similarity-jaccard-prefix(4, [1, 2, 3, 4], 2, [1, 2], 1, .5f),
similarity-jaccard-prefix(4, [1, 2, 3, 4], 4, [1, 2], 1, .33f)
]
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted-check_ints.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted-check_ints.aql
new file mode 100644
index 0000000..b48bdd3
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted-check_ints.aql
@@ -0,0 +1,23 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_similarity-jaccard-sorted-check_ints.adm";
+
+let $a := [ ]
+let $b := [1, 2, 3, 4, 5]
+let $c := [1, 2, 3, 4, 5, 8, 9]
+let $d := [1, 2, 3, 4, 5, 7, 8, 9, 10, 11]
+let $results :=
+[
+ similarity-jaccard-sorted-check($a, $b, 0.0f),
+ similarity-jaccard-sorted-check($b, $a, 0.0f),
+ similarity-jaccard-sorted-check($a, $b, 0.1f),
+ similarity-jaccard-sorted-check($b, $a, 0.1f),
+ similarity-jaccard-sorted-check($c, $d, 0.6f),
+ similarity-jaccard-sorted-check($d, $c, 0.6f),
+ similarity-jaccard-sorted-check($c, $d, 0.8f),
+ similarity-jaccard-sorted-check($d, $c, 0.8f)
+]
+for $i in $results
+return $i
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted-check_query.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted-check_query.aql
new file mode 100644
index 0000000..aa5b067
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted-check_query.aql
@@ -0,0 +1,29 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+ partitioned by key id on group1;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_similarity-jaccard-sorted-check_query.adm";
+
+for $paper in dataset('DBLP')
+let $paper_tokens := word-tokens($paper.title)
+let $query_tokens := word-tokens("Cooperative Transactions for Environments")
+let $jacc := similarity-jaccard-sorted-check($paper_tokens, $query_tokens, 0.5f)
+where $jacc[0]
+return $paper
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted-check_strings.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted-check_strings.aql
new file mode 100644
index 0000000..3fdf844
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted-check_strings.aql
@@ -0,0 +1,29 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_similarity-jaccard-sorted-check_strings.adm";
+
+let $a := [ ]
+let $b := ["abc", "bcd", "cde", "def", "efg"]
+let $c := ["abc", "bcd", "cde", "def", "efg", "hij", "ijk"]
+let $d := ["abc", "bcd", "cde", "def", "efg", "fgh", "ghi", "hij", "ijk", "jkl"]
+let $e := ["Abc", "bCd", "cdE", "DEf", "eFG", "HiJ", "IJK"]
+let $f := ["abc", "bcd", "cde", "def", "efg", "fgh", "ghi", "hij", "ijk", "jkl"]
+let $results :=
+[
+ similarity-jaccard-sorted-check($a, $b, 0.0f),
+ similarity-jaccard-sorted-check($b, $a, 0.0f),
+ similarity-jaccard-sorted-check($a, $b, 0.1f),
+ similarity-jaccard-sorted-check($b, $a, 0.1f),
+ similarity-jaccard-sorted-check($c, $d, 0.6f),
+ similarity-jaccard-sorted-check($d, $c, 0.6f),
+ similarity-jaccard-sorted-check($c, $d, 0.8f),
+ similarity-jaccard-sorted-check($d, $c, 0.8f),
+ similarity-jaccard-sorted-check($e, $f, 0.6f),
+ similarity-jaccard-sorted-check($f, $e, 0.6f),
+ similarity-jaccard-sorted-check($e, $f, 0.8f),
+ similarity-jaccard-sorted-check($f, $e, 0.8f)
+]
+for $i in $results
+return $i
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted_ints.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted_ints.aql
new file mode 100644
index 0000000..5fefbf5
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted_ints.aql
@@ -0,0 +1,19 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_similarity-jaccard-sorted_ints.adm";
+
+let $a := [ ]
+let $b := [1, 2, 3, 4, 5]
+let $c := [1, 2, 3, 4, 5, 8, 9]
+let $d := [1, 2, 3, 4, 5, 7, 8, 9, 10, 11]
+let $results :=
+[
+ similarity-jaccard-sorted($a, $b),
+ similarity-jaccard-sorted($b, $a),
+ similarity-jaccard-sorted($c, $d),
+ similarity-jaccard-sorted($d, $c)
+]
+for $i in $results
+return $i
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted_query.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted_query.aql
new file mode 100644
index 0000000..a2373af
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted_query.aql
@@ -0,0 +1,28 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+ partitioned by key id on group1;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_similarity-jaccard-sorted_query.adm";
+
+for $paper in dataset('DBLP')
+let $paper_tokens := word-tokens($paper.title)
+let $query_tokens := word-tokens("Cooperative Transactions for Environments")
+where similarity-jaccard-sorted($paper_tokens, $query_tokens) >= 0.5
+return $paper
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted_strings.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted_strings.aql
new file mode 100644
index 0000000..67a87d1
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard-sorted_strings.aql
@@ -0,0 +1,23 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_similarity-jaccard-sorted_strings.adm";
+
+let $a := [ ]
+let $b := ["abc", "bcd", "cde", "def", "efg"]
+let $c := ["abc", "bcd", "cde", "def", "efg", "hij", "ijk"]
+let $d := ["abc", "bcd", "cde", "def", "efg", "fgh", "ghi", "hij", "ijk", "jkl"]
+let $e := ["Abc", "bCd", "cdE", "DEf", "eFG", "HiJ", "IJK"]
+let $f := ["abc", "bcd", "cde", "def", "efg", "fgh", "ghi", "hij", "ijk", "jkl"]
+let $results :=
+[
+ similarity-jaccard-sorted($a, $b),
+ similarity-jaccard-sorted($b, $a),
+ similarity-jaccard-sorted($c, $d),
+ similarity-jaccard-sorted($d, $c),
+ similarity-jaccard-sorted($e, $f),
+ similarity-jaccard-sorted($f, $e)
+]
+for $i in $results
+return $i
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_01.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_01.aql
deleted file mode 100644
index ae0747e..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_01.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_similarity-jaccard_01.adm";
-
-let $a := [1, 2, 3, 4, 5, 8, 9]
-let $b := [1, 2, 3, 4, 5, 7, 8, 9, 10, 11]
-let $jacc := similarity-jaccard($a, $b)
-return $jacc
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_02.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_02.aql
deleted file mode 100644
index d0a7e1f..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_02.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_similarity-jaccard_02.adm";
-
-let $a := ["a"]
-let $b := ["b"]
-let $jacc := similarity-jaccard($a, $b)
-return $jacc
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_03.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_03.aql
deleted file mode 100644
index fd5dd21..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_03.aql
+++ /dev/null
@@ -1,10 +0,0 @@
-drop dataverse test if exists;
-create dataverse test;
-use dataverse test;
-
-write output to nc1:"rttest/similarity_similarity-jaccard_03.adm";
-
-let $a := ["a"]
-let $b := [ ]
-let $jacc := similarity-jaccard($a, $b)
-return $jacc
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_ints.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_ints.aql
new file mode 100644
index 0000000..ee20d00
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_ints.aql
@@ -0,0 +1,23 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_similarity-jaccard_ints.adm";
+
+let $a := [ ]
+let $b := [1, 2, 3, 4, 5]
+let $c := [1, 2, 3, 4, 5, 8, 9]
+let $d := [1, 2, 3, 4, 5, 7, 8, 9, 10, 11]
+let $e := [4, 3, 5, 8, 9, 2, 1]
+let $f := [7, 5, 8, 9, 3, 10, 1, 2, 11, 4]
+let $results :=
+[
+ similarity-jaccard($a, $b),
+ similarity-jaccard($b, $a),
+ similarity-jaccard($c, $d),
+ similarity-jaccard($d, $c),
+ similarity-jaccard($e, $f),
+ similarity-jaccard($f, $e)
+]
+for $i in $results
+return $i
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_query.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_query.aql
new file mode 100644
index 0000000..05f3a61
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_query.aql
@@ -0,0 +1,28 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+ id: int32,
+ dblpid: string,
+ title: string,
+ authors: string,
+ misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+ partitioned by key id on group1;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
+write output to nc1:"rttest/similarity_similarity-jaccard_query.adm";
+
+for $paper in dataset('DBLP')
+let $paper_tokens := word-tokens($paper.title)
+let $query_tokens := word-tokens("Transactions for Cooperative Environments")
+where similarity-jaccard($paper_tokens, $query_tokens) >= 0.5f
+return $paper
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_strings.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_strings.aql
new file mode 100644
index 0000000..107d92f
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/similarity-jaccard_strings.aql
@@ -0,0 +1,27 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+write output to nc1:"rttest/similarity_similarity-jaccard_strings.adm";
+
+let $a := [ ]
+let $b := ["abc", "bcd", "cde", "def", "efg"]
+let $c := ["abc", "bcd", "cde", "def", "efg", "hij", "ijk"]
+let $d := ["abc", "bcd", "cde", "def", "efg", "fgh", "ghi", "hij", "ijk", "jkl"]
+let $e := ["efg", "abc", "cde", "def", "hij", "ijk", "bcd"]
+let $f := ["abc", "ijk", "bcd", "efg", "fgh", "ghi", "def", "hij", "jkl", "cde"]
+let $g := ["Efg", "aBc", "cdE", "DEf", "hIJ", "IjK", "BCD"]
+let $h := ["abc", "ijk", "bcd", "efg", "fgh", "ghi", "def", "hij", "jkl", "cde"]
+let $results :=
+[
+ similarity-jaccard($a, $b),
+ similarity-jaccard($b, $a),
+ similarity-jaccard($c, $d),
+ similarity-jaccard($d, $c),
+ similarity-jaccard($e, $f),
+ similarity-jaccard($f, $e),
+ similarity-jaccard($g, $h),
+ similarity-jaccard($h, $g)
+]
+for $i in $results
+return $i
\ No newline at end of file