blob: ee768d707bc7f77683bb29fd6644a620e8f233f2 [file] [log] [blame]
vinayakb5ee049d2013-04-06 21:21:29 +00001/*
2 * Description : Fuzzy self joins a dataset, DBLP, based on the edit-distance function of its authors.
3 * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join.
4 * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index.
5 * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary.
6 * Success : Yes
7 */
8
9drop dataverse test if exists;
10create dataverse test;
11use dataverse test;
12
13create type DBLPType as closed {
14 id: int32,
15 dblpid: string,
16 title: string,
17 authors: string,
18 misc: string
19}
20
21create dataset DBLP(DBLPType) primary key id;
22
23create index ngram_index on DBLP(authors) type ngram(3);
24
25write output to nc1:"rttest/inverted-index-join-noeqjoin_ngram-edit-distance-inline.adm";
26
27for $a in dataset('DBLP')
28for $b in dataset('DBLP')
29let $ed := edit-distance($a.authors, $b.authors)
30where $ed < 3 and $a.id < $b.id
31return {"aauthors": $a.authors, "bauthors": $b.authors, "ed": $ed}