Added asterix project

git-svn-id: https://asterixdb.googlecode.com/svn/trunk/asterix@12 eaa15691-b419-025a-1212-ee371bd00084
diff --git a/asterix-app/src/test/resources/fuzzyjoin/users-visitors/010-load-users.aql b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/010-load-users.aql
new file mode 100644
index 0000000..be75fa6
--- /dev/null
+++ b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/010-load-users.aql
@@ -0,0 +1,18 @@
+use dataverse fuzzy1;
+
+declare type UserType as open {
+  uid: int32, 
+  name: string,
+  lottery_numbers: {{int32}},
+  interests: {{string}}
+}
+
+declare nodegroup group1 on nc1, nc2;
+
+declare dataset Users(UserType) 
+  partitioned by key uid on group1;
+
+load dataset Users 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1:///asterix/asterix-app/data/users-visitors-small/users.json"),("format"="adm")) pre-sorted;
+
diff --git a/asterix-app/src/test/resources/fuzzyjoin/users-visitors/020-drop-users.aql b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/020-drop-users.aql
new file mode 100644
index 0000000..86525a7
--- /dev/null
+++ b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/020-drop-users.aql
@@ -0,0 +1,15 @@
+use dataverse fuzzy1;
+
+declare type UserType as open {
+  uid: int32, 
+  name: string,
+  lottery_numbers: {{int32}},
+  interests: {{string}}
+}
+
+declare nodegroup group1 on nc1, nc2;
+
+declare dataset Users(UserType) 
+  partitioned by key uid on group1;
+
+drop dataset Users;
diff --git a/asterix-app/src/test/resources/fuzzyjoin/users-visitors/030-filter-users.aql b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/030-filter-users.aql
new file mode 100644
index 0000000..298c0b1
--- /dev/null
+++ b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/030-filter-users.aql
@@ -0,0 +1,21 @@
+use dataverse fuzzy1;
+
+declare type UserType as open {
+  uid: int32, 
+  name: string,
+  lottery_numbers: {{int32}},
+  interests: {{string}}
+}
+
+declare nodegroup group1 on nc1, nc2;
+
+declare dataset Users(UserType) 
+  partitioned by key uid on group1;
+
+write output to nc1:'/tmp/users.adm';
+
+for $u in dataset('Users')
+// where $u.uid > 1
+// where $u.name = 'Jodi Rotruck'
+return $u
+// return's:q1 { 'user_name': $u.name }
diff --git a/asterix-app/src/test/resources/fuzzyjoin/users-visitors/040-load-visitors.aql b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/040-load-visitors.aql
new file mode 100644
index 0000000..a75989a
--- /dev/null
+++ b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/040-load-visitors.aql
@@ -0,0 +1,19 @@
+use dataverse fuzzy1;
+
+declare type VisitorType as open {
+  vid: int32, 
+  name: string,
+  lottery_numbers: {{int32}},
+  interests: {{string}}
+}
+
+declare nodegroup group1 on nc1, nc2;
+
+declare dataset Visitors(VisitorType) 
+  partitioned by key vid on group1;
+
+load dataset Visitors 
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1:///asterix/asterix-app/data/users-visitors-small/visitors.json"),("format"="asm")) pre-sorted;
+
+
diff --git a/asterix-app/src/test/resources/fuzzyjoin/users-visitors/050-drop-visitors.aql b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/050-drop-visitors.aql
new file mode 100644
index 0000000..15047b8
--- /dev/null
+++ b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/050-drop-visitors.aql
@@ -0,0 +1,15 @@
+use dataverse fuzzy1;
+
+declare type VisitorType as open {
+  vid: int32, 
+  name: string,
+  lottery_numbers: {{int32}},
+  interests: {{string}}
+}
+
+declare nodegroup group1 on nc1, nc2;
+
+declare dataset Visitors(VisitorType) 
+  partitioned by key vid on group1;
+
+drop dataset Visitors;
diff --git a/asterix-app/src/test/resources/fuzzyjoin/users-visitors/060-fililter-visitors.aql b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/060-fililter-visitors.aql
new file mode 100644
index 0000000..dae180e
--- /dev/null
+++ b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/060-fililter-visitors.aql
@@ -0,0 +1,21 @@
+use dataverse fuzzy1;
+
+declare type VisitorType as open {
+  vid: int32, 
+  name: string,
+  lottery_numbers: {{int32}},
+  interests: {{string}}
+}
+
+declare nodegroup group1 on nc1, nc2;
+
+declare dataset Visitors(VisitorType) 
+  partitioned by key vid on group1;
+
+write output to nc1:'/tmp/visitors.adm';
+
+for $v in dataset('Visitors')
+// where $v.vid >= 10
+// where $v.name = 'Marvella Loud'
+return $v
+// return { 'user_name': $v.name }
diff --git a/asterix-app/src/test/resources/fuzzyjoin/users-visitors/070-join-aql-lottery_numbers.aql b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/070-join-aql-lottery_numbers.aql
new file mode 100644
index 0000000..aaa145f
--- /dev/null
+++ b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/070-join-aql-lottery_numbers.aql
@@ -0,0 +1,92 @@
+use dataverse fuzzy1;
+
+declare type UserType as open {
+  uid: int32, 
+  name: string,
+  lottery_numbers: {{int32}},
+  interests: {{string}}
+}
+
+declare type VisitorType as open {
+  vid: int32, 
+  name: string,
+  lottery_numbers: {{int32}},
+  interests: {{string}}
+}
+
+declare nodegroup group1 on nc1, nc2;
+
+declare dataset Users(UserType) 
+  partitioned by key uid on group1;
+declare dataset Visitors(VisitorType) 
+  partitioned by key vid on group1;
+
+write output to nc1:'/tmp/users-visitors.adm';
+
+//
+// -- - Stage 3 - --
+//
+for $ridpair in 
+    //
+    // -- - Stage 2 - --
+    //
+    for $user in dataset('Users')
+    let $tokens_user :=
+        for $lottery_number in $user.lottery_numbers
+        for $token at $i in
+            //
+            // -- - Stage 1 - --
+            //
+            for $user in dataset('Users')
+            for $lottery_number in $user.lottery_numbers 
+            group by $item := $lottery_number with $user 
+            order by count($user)
+            return $item
+        where $lottery_number = $token
+        order by $i
+        return $i
+    for $prefix_token_user in subset-collection(
+                                $tokens_user, 
+                                0,
+                                prefix-len(
+                                    len($tokens_user), 'Jaccard', .5))
+
+    for $visitor in dataset('Visitors')
+    let $tokens_visitor :=
+        for $lottery_number in $visitor.lottery_numbers
+        for $token at $i in
+            //
+            // -- - Stage 1 - --
+            //
+            for $user in dataset('Users')
+            for $lottery_number in $user.lottery_numbers 
+            group by $item := $lottery_number with $user 
+            order by count($user)
+            return $item
+        where $lottery_number = $token
+        order by $i
+        return $i
+    for $prefix_token_visitor in subset-collection(
+                                    $tokens_visitor, 
+                                    0, 
+                                    prefix-len(
+                                        len($tokens_visitor), 'Jaccard', .5))
+
+    where $prefix_token_user = $prefix_token_visitor
+
+    let $sim := similarity(
+                    len($user.lottery_numbers),
+                    $tokens_user,
+                    len($visitor.lottery_numbers),
+                    $tokens_visitor,
+                    $prefix_token_user,
+                    'Jaccard',
+                    .5)
+    where $sim >= .5
+    group by $uid := $user.uid, $vid := $visitor.vid with $sim
+    return { 'uid': $uid, 'vid': $vid, 'sim': $sim[0] }
+
+for $user in dataset('Users')
+for $visitor in dataset('Visitors')
+where $ridpair.uid = $user.uid and $ridpair.vid = $visitor.vid
+return { 'user': $user, 'visitor': $visitor, 'sim': $ridpair.sim }
diff --git a/asterix-app/src/test/resources/fuzzyjoin/users-visitors/080-join-op-lottery_numbers.aql b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/080-join-op-lottery_numbers.aql
new file mode 100644
index 0000000..ca5bebb
--- /dev/null
+++ b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/080-join-op-lottery_numbers.aql
@@ -0,0 +1,31 @@
+use dataverse fuzzy1;
+
+declare type UserType as open {
+  uid: int32, 
+  name: string,
+  lottery_numbers: {{int32}},
+  interests: {{string}}
+}
+
+declare type VisitorType as open {
+  vid: int32, 
+  name: string,
+  lottery_numbers: {{int32}},
+  interests: {{string}}
+}
+
+declare nodegroup group1 on nc1, nc2;
+
+declare dataset Users(UserType) 
+  partitioned by key uid on group1;
+declare dataset Visitors(VisitorType) 
+  partitioned by key vid on group1;
+
+write output to nc1:'/tmp/users-visitors.adm';
+
+set simthreshold '.5';
+
+for $user in dataset('Users')
+for $visitor in dataset('Visitors')
+where $user.lottery_numbers ~= $visitor.lottery_numbers
+return { 'user': $user, 'visitor': $visitor }
diff --git a/asterix-app/src/test/resources/fuzzyjoin/users-visitors/090-join-op-interests.aql b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/090-join-op-interests.aql
new file mode 100644
index 0000000..a7cf0a4
--- /dev/null
+++ b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/090-join-op-interests.aql
@@ -0,0 +1,31 @@
+use dataverse fuzzy1;
+
+declare type UserType as open {
+  uid: {{int32}}, 
+  name: string,
+  lottery_numbers: {{int32}},
+  interests: {{string}}
+}
+
+declare type VisitorType as open {
+  vid: {{int32}}, 
+  name: string,
+  lottery_numbers: {{int32}},
+  interests: {{string}}
+}
+
+declare nodegroup group1 on nc1, nc2;
+
+declare dataset Users(UserType) 
+  partitioned by key uid on group1;
+declare dataset Visitors(VisitorType) 
+  partitioned by key vid on group1;
+
+write output to nc1:'/tmp/users-visitors.adm';
+
+set simthreshold '.5';
+
+for $user in dataset('Users')
+for $visitor in dataset('Visitors')
+where $user.interests ~= $visitor.interests
+return { 'user': $user, 'visitor': $visitor }