Added asterix project
git-svn-id: https://asterixdb.googlecode.com/svn/trunk/asterix@12 eaa15691-b419-025a-1212-ee371bd00084
diff --git a/asterix-app/src/test/resources/fuzzyjoin/users-visitors/010-load-users.aql b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/010-load-users.aql
new file mode 100644
index 0000000..be75fa6
--- /dev/null
+++ b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/010-load-users.aql
@@ -0,0 +1,18 @@
+use dataverse fuzzy1;
+
+declare type UserType as open {
+ uid: int32,
+ name: string,
+ lottery_numbers: {{int32}},
+ interests: {{string}}
+}
+
+declare nodegroup group1 on nc1, nc2;
+
+declare dataset Users(UserType)
+ partitioned by key uid on group1;
+
+load dataset Users
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1:///asterix/asterix-app/data/users-visitors-small/users.json"),("format"="adm")) pre-sorted;
+
diff --git a/asterix-app/src/test/resources/fuzzyjoin/users-visitors/020-drop-users.aql b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/020-drop-users.aql
new file mode 100644
index 0000000..86525a7
--- /dev/null
+++ b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/020-drop-users.aql
@@ -0,0 +1,15 @@
+use dataverse fuzzy1;
+
+declare type UserType as open {
+ uid: int32,
+ name: string,
+ lottery_numbers: {{int32}},
+ interests: {{string}}
+}
+
+declare nodegroup group1 on nc1, nc2;
+
+declare dataset Users(UserType)
+ partitioned by key uid on group1;
+
+drop dataset Users;
diff --git a/asterix-app/src/test/resources/fuzzyjoin/users-visitors/030-filter-users.aql b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/030-filter-users.aql
new file mode 100644
index 0000000..298c0b1
--- /dev/null
+++ b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/030-filter-users.aql
@@ -0,0 +1,21 @@
+use dataverse fuzzy1;
+
+declare type UserType as open {
+ uid: int32,
+ name: string,
+ lottery_numbers: {{int32}},
+ interests: {{string}}
+}
+
+declare nodegroup group1 on nc1, nc2;
+
+declare dataset Users(UserType)
+ partitioned by key uid on group1;
+
+write output to nc1:'/tmp/users.adm';
+
+for $u in dataset('Users')
+// where $u.uid > 1
+// where $u.name = 'Jodi Rotruck'
+return $u
+// return's:q1 { 'user_name': $u.name }
diff --git a/asterix-app/src/test/resources/fuzzyjoin/users-visitors/040-load-visitors.aql b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/040-load-visitors.aql
new file mode 100644
index 0000000..a75989a
--- /dev/null
+++ b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/040-load-visitors.aql
@@ -0,0 +1,19 @@
+use dataverse fuzzy1;
+
+declare type VisitorType as open {
+ vid: int32,
+ name: string,
+ lottery_numbers: {{int32}},
+ interests: {{string}}
+}
+
+declare nodegroup group1 on nc1, nc2;
+
+declare dataset Visitors(VisitorType)
+ partitioned by key vid on group1;
+
+load dataset Visitors
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1:///asterix/asterix-app/data/users-visitors-small/visitors.json"),("format"="asm")) pre-sorted;
+
+
diff --git a/asterix-app/src/test/resources/fuzzyjoin/users-visitors/050-drop-visitors.aql b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/050-drop-visitors.aql
new file mode 100644
index 0000000..15047b8
--- /dev/null
+++ b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/050-drop-visitors.aql
@@ -0,0 +1,15 @@
+use dataverse fuzzy1;
+
+declare type VisitorType as open {
+ vid: int32,
+ name: string,
+ lottery_numbers: {{int32}},
+ interests: {{string}}
+}
+
+declare nodegroup group1 on nc1, nc2;
+
+declare dataset Visitors(VisitorType)
+ partitioned by key vid on group1;
+
+drop dataset Visitors;
diff --git a/asterix-app/src/test/resources/fuzzyjoin/users-visitors/060-fililter-visitors.aql b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/060-fililter-visitors.aql
new file mode 100644
index 0000000..dae180e
--- /dev/null
+++ b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/060-fililter-visitors.aql
@@ -0,0 +1,21 @@
+use dataverse fuzzy1;
+
+declare type VisitorType as open {
+ vid: int32,
+ name: string,
+ lottery_numbers: {{int32}},
+ interests: {{string}}
+}
+
+declare nodegroup group1 on nc1, nc2;
+
+declare dataset Visitors(VisitorType)
+ partitioned by key vid on group1;
+
+write output to nc1:'/tmp/visitors.adm';
+
+for $v in dataset('Visitors')
+// where $v.vid >= 10
+// where $v.name = 'Marvella Loud'
+return $v
+// return { 'user_name': $v.name }
diff --git a/asterix-app/src/test/resources/fuzzyjoin/users-visitors/070-join-aql-lottery_numbers.aql b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/070-join-aql-lottery_numbers.aql
new file mode 100644
index 0000000..aaa145f
--- /dev/null
+++ b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/070-join-aql-lottery_numbers.aql
@@ -0,0 +1,92 @@
+use dataverse fuzzy1;
+
+declare type UserType as open {
+ uid: int32,
+ name: string,
+ lottery_numbers: {{int32}},
+ interests: {{string}}
+}
+
+declare type VisitorType as open {
+ vid: int32,
+ name: string,
+ lottery_numbers: {{int32}},
+ interests: {{string}}
+}
+
+declare nodegroup group1 on nc1, nc2;
+
+declare dataset Users(UserType)
+ partitioned by key uid on group1;
+declare dataset Visitors(VisitorType)
+ partitioned by key vid on group1;
+
+write output to nc1:'/tmp/users-visitors.adm';
+
+//
+// -- - Stage 3 - --
+//
+for $ridpair in
+ //
+ // -- - Stage 2 - --
+ //
+ for $user in dataset('Users')
+ let $tokens_user :=
+ for $lottery_number in $user.lottery_numbers
+ for $token at $i in
+ //
+ // -- - Stage 1 - --
+ //
+ for $user in dataset('Users')
+ for $lottery_number in $user.lottery_numbers
+ group by $item := $lottery_number with $user
+ order by count($user)
+ return $item
+ where $lottery_number = $token
+ order by $i
+ return $i
+ for $prefix_token_user in subset-collection(
+ $tokens_user,
+ 0,
+ prefix-len(
+ len($tokens_user), 'Jaccard', .5))
+
+ for $visitor in dataset('Visitors')
+ let $tokens_visitor :=
+ for $lottery_number in $visitor.lottery_numbers
+ for $token at $i in
+ //
+ // -- - Stage 1 - --
+ //
+ for $user in dataset('Users')
+ for $lottery_number in $user.lottery_numbers
+ group by $item := $lottery_number with $user
+ order by count($user)
+ return $item
+ where $lottery_number = $token
+ order by $i
+ return $i
+ for $prefix_token_visitor in subset-collection(
+ $tokens_visitor,
+ 0,
+ prefix-len(
+ len($tokens_visitor), 'Jaccard', .5))
+
+ where $prefix_token_user = $prefix_token_visitor
+
+ let $sim := similarity(
+ len($user.lottery_numbers),
+ $tokens_user,
+ len($visitor.lottery_numbers),
+ $tokens_visitor,
+ $prefix_token_user,
+ 'Jaccard',
+ .5)
+ where $sim >= .5
+ group by $uid := $user.uid, $vid := $visitor.vid with $sim
+ return { 'uid': $uid, 'vid': $vid, 'sim': $sim[0] }
+
+for $user in dataset('Users')
+for $visitor in dataset('Visitors')
+where $ridpair.uid = $user.uid and $ridpair.vid = $visitor.vid
+return { 'user': $user, 'visitor': $visitor, 'sim': $ridpair.sim }
diff --git a/asterix-app/src/test/resources/fuzzyjoin/users-visitors/080-join-op-lottery_numbers.aql b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/080-join-op-lottery_numbers.aql
new file mode 100644
index 0000000..ca5bebb
--- /dev/null
+++ b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/080-join-op-lottery_numbers.aql
@@ -0,0 +1,31 @@
+use dataverse fuzzy1;
+
+declare type UserType as open {
+ uid: int32,
+ name: string,
+ lottery_numbers: {{int32}},
+ interests: {{string}}
+}
+
+declare type VisitorType as open {
+ vid: int32,
+ name: string,
+ lottery_numbers: {{int32}},
+ interests: {{string}}
+}
+
+declare nodegroup group1 on nc1, nc2;
+
+declare dataset Users(UserType)
+ partitioned by key uid on group1;
+declare dataset Visitors(VisitorType)
+ partitioned by key vid on group1;
+
+write output to nc1:'/tmp/users-visitors.adm';
+
+set simthreshold '.5';
+
+for $user in dataset('Users')
+for $visitor in dataset('Visitors')
+where $user.lottery_numbers ~= $visitor.lottery_numbers
+return { 'user': $user, 'visitor': $visitor }
diff --git a/asterix-app/src/test/resources/fuzzyjoin/users-visitors/090-join-op-interests.aql b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/090-join-op-interests.aql
new file mode 100644
index 0000000..a7cf0a4
--- /dev/null
+++ b/asterix-app/src/test/resources/fuzzyjoin/users-visitors/090-join-op-interests.aql
@@ -0,0 +1,31 @@
+use dataverse fuzzy1;
+
+declare type UserType as open {
+ uid: {{int32}},
+ name: string,
+ lottery_numbers: {{int32}},
+ interests: {{string}}
+}
+
+declare type VisitorType as open {
+ vid: {{int32}},
+ name: string,
+ lottery_numbers: {{int32}},
+ interests: {{string}}
+}
+
+declare nodegroup group1 on nc1, nc2;
+
+declare dataset Users(UserType)
+ partitioned by key uid on group1;
+declare dataset Visitors(VisitorType)
+ partitioned by key vid on group1;
+
+write output to nc1:'/tmp/users-visitors.adm';
+
+set simthreshold '.5';
+
+for $user in dataset('Users')
+for $visitor in dataset('Visitors')
+where $user.interests ~= $visitor.interests
+return { 'user': $user, 'visitor': $visitor }