blob: 866c027e404d6290d5a16acaaa36ec7a29ff3ed5 [file] [log] [blame]
vinayakb38b7ca42012-03-05 05:44:15 +00001drop dataverse fuzzyjoin_078 if exists;
2
3create dataverse fuzzyjoin_078;
4
5use dataverse fuzzyjoin_078;
6
7create type DBLPType as open {
8 id: int32,
9 dblpid: string,
10 title: string,
11 authors: string,
12 misc: string
13}
14
15create nodegroup group1 if not exists on nc1, nc2;
16
17create dataset DBLP_fuzzyjoin_078(DBLPType)
18 partitioned by key id on group1;
19
20write output to nc1:'rttest/fuzzyjoin_078.adm';
21
22 //
23 // -- - Stage 2 - --
24 //
25 for $paperDBLP in dataset('DBLP_fuzzyjoin_078')
26 let $unrankedTokensDBLP := counthashed-word-tokens($paperDBLP.title)
27 let $tokensDBLP :=
28 for $token in $unrankedTokensDBLP
29 for $tokenRanked at $i in
30 //
31 // -- - Stage 1 - --
32 //
33 for $paper in dataset('DBLP_fuzzyjoin_078')
34 for $token in counthashed-word-tokens($paper.title)
35 /*+ hash */
36 group by $tokenGroupped := $token with $paper
37 /*+ inmem 1 302 */
38 order by count($paper), $tokenGroupped
39 return $tokenGroupped
40 where $token = /*+ bcast */ $tokenRanked
41 order by $i
42 return $i
43 order by $paperDBLP.id
44 return {'id': $paperDBLP.id, 'tokens':$tokensDBLP}