blob: 2898058d5b6b1dbab15a63dab610bdf2ec0a3c45 [file] [log] [blame]
vinayakb38b7ca42012-03-05 05:44:15 +00001drop dataverse fj-dblp-csx if exists;
2
3create dataverse fj-dblp-csx;
4
5use dataverse fj-dblp-csx;
6
7create type DBLPType as open {
8 id: int32,
9 dblpid: string,
10 title: string,
11 authors: string,
12 misc: string
13}
14
15create type CSXType as open {
16 id: int32,
17 csxid: string,
18 title: string,
19 authors: string,
20 misc: string
21}
22
23create nodegroup group1 if not exists on nc1, nc2;
24
25create dataset DBLP(DBLPType) partitioned by key id on group1;
26create dataset CSX(CSXType) partitioned by key id on group1;
27
28write output to nc1:'rttest/fj-dblp-csx.adm';
29
30 for $paperDBLP in dataset('DBLP')
31 let $idDBLP := $paperDBLP.id
32 let $unrankedTokensDBLP := counthashed-word-tokens($paperDBLP.title)
33 let $tokensDBLP :=
34 for $token in $unrankedTokensDBLP
35 for $tokenRanked at $i in
36 //
37 // -- - Stage 1 - --
38 //
39 // for $paper in dataset('DBLP')
40 // for $token in counthashed-word-tokens($paper.title)
41 // group by $tokenGroupped := $token with $paper
42 // order by count($paper), $tokenGroupped
43 // return $tokenGroupped
44 for $paper in dataset('DBLP')
45 return $paper.title
46 where $token = $tokenRanked
47 order by $i
48 return $i
49
50 for $prefixTokenDBLP in $tokensDBLP
51
52 for $paperCSX in dataset('CSX')
53 let $idCSX := $paperCSX.id
54 let $unrankedTokensCSX := counthashed-word-tokens($paperCSX.title)
55 let $tokensCSX :=
56 for $token in $unrankedTokensCSX
57 for $tokenRanked at $i in
58 //
59 // -- - Stage 1 - --
60 //
61 // for $paper in dataset('DBLP')
62 // for $token in counthashed-word-tokens($paper.title)
63 // group by $tokenGroupped := $token with $paper
64 // order by count($paper), $tokenGroupped
65 // return $tokenGroupped
66 for $paper in dataset('DBLP')
67 return $paper.title
68 where $token = $tokenRanked
69 order by $i
70 return $i
71
72 for $prefixTokenCSX in $tokensCSX
73 where $prefixTokenDBLP = $prefixTokenCSX
74 group by $idDBLP := $idDBLP, $idCSX := $idCSX with $paperDBLP
75 order by $idDBLP, $idCSX
76 return {'idDBLP': $idDBLP, 'idCSX': $idCSX}