blob: f2467f22bce40a6abd03bca84e0a7d2c8fc5f8d6 [file] [log] [blame]
use dataverse fuzzy1;
declare type DBLPType as open {
id: int32,
dblpid: string,
title: string,
authors: string,
misc: string
}
declare nodegroup group1 on nc1, nc2;
declare dataset DBLP(DBLPType)
partitioned by key id on group1;
write output to nc1:'/tmp/dblp.adm';
set simthreshold '.5';
for $paperR in dataset('DBLP')
for $paperS in dataset('DBLP')
where $paperR.title ~= $paperS.title and $paperR.id < $paperS.id
return { 'R': { 'dblpid': $paperR.dblpid, 'title': $paperR.title },
'S': { 'dblpid': $paperS.dblpid, 'title': $paperS.title }}