blob: 032031f2d62ea58a5c86a5d79effd2219e26ebd9 [file] [log] [blame]
vinayakb38b7ca42012-03-05 05:44:15 +00001use dataverse fuzzy1;
2
3declare type DBLPType as open {
4 id: int32,
5 dblpid: string,
6 title: string,
7 authors: string,
8 misc: string
9}
10
11declare type CSXType as open {
12 id: int32,
13 csxid: string,
14 title: string,
15 authors: string,
16 misc: string
17}
18
19declare nodegroup group1 on nc1, nc2;
20
21declare dataset DBLP(DBLPType)
22 partitioned by key id on group1;
23
24declare dataset CSX(CSXType)
25 partitioned by key id on group1;
26
27write output to nc1:'/tmp/pub.adm';
28
29set simthreshold '.5';
30
31for $paperR in dataset('DBLP')
32for $paperS in dataset('CSX')
33where $paperR.title ~= $paperS.title
34return { 'R': { 'dblpid': $paperR.dblpid, 'title': $paperR.title },
35 'S': { 'csxid': $paperS.csxid, 'title': $paperS.title }}