blob: f2467f22bce40a6abd03bca84e0a7d2c8fc5f8d6 [file] [log] [blame]
vinayakb38b7ca42012-03-05 05:44:15 +00001use dataverse fuzzy1;
2
3declare type DBLPType as open {
4 id: int32,
5 dblpid: string,
6 title: string,
7 authors: string,
8 misc: string
9}
10
11declare nodegroup group1 on nc1, nc2;
12
13declare dataset DBLP(DBLPType)
14 partitioned by key id on group1;
15
16write output to nc1:'/tmp/dblp.adm';
17
18set simthreshold '.5';
19
20for $paperR in dataset('DBLP')
21for $paperS in dataset('DBLP')
22where $paperR.title ~= $paperS.title and $paperR.id < $paperS.id
23return { 'R': { 'dblpid': $paperR.dblpid, 'title': $paperR.title },
24 'S': { 'dblpid': $paperS.dblpid, 'title': $paperS.title }}