blob: 032031f2d62ea58a5c86a5d79effd2219e26ebd9 [file] [log] [blame]
use dataverse fuzzy1;
declare type DBLPType as open {
id: int32,
dblpid: string,
title: string,
authors: string,
misc: string
}
declare type CSXType as open {
id: int32,
csxid: string,
title: string,
authors: string,
misc: string
}
declare nodegroup group1 on nc1, nc2;
declare dataset DBLP(DBLPType)
partitioned by key id on group1;
declare dataset CSX(CSXType)
partitioned by key id on group1;
write output to nc1:'/tmp/pub.adm';
set simthreshold '.5';
for $paperR in dataset('DBLP')
for $paperS in dataset('CSX')
where $paperR.title ~= $paperS.title
return { 'R': { 'dblpid': $paperR.dblpid, 'title': $paperR.title },
'S': { 'csxid': $paperS.csxid, 'title': $paperS.title }}