blob: c6e29e6200bf3410d251be8f6ec9aa2c19454bd2 [file] [log] [blame]
alexander.behm417fb9b2012-11-15 19:36:44 +00001/*
2 * Description : Fuzzy joins two datasets, Customers and Customers2, based on the edit-distance function of their names.
3 * Customers has a 3-gram index on name, and we expect the join to be transformed into an indexed nested-loop join.
4 * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index.
5 * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary.
6 * Success : Yes
7 */
8
9drop dataverse test if exists;
10create dataverse test;
11use dataverse test;
12
13create type AddressType as open {
14 number: int32,
15 street: string,
16 city: string
17}
18
19create type CustomerType as open {
20 cid: int32,
21 name: string,
22 age: int32?,
23 address: AddressType?,
24 interests: [string],
25 children: [ { name: string, age: int32? } ]
26}
27
ramangrover29669d8f62013-02-11 06:03:32 +000028create dataset Customers(CustomerType) primary key cid;
alexander.behm417fb9b2012-11-15 19:36:44 +000029
ramangrover29669d8f62013-02-11 06:03:32 +000030create dataset Customers2(CustomerType) primary key cid;
alexander.behm417fb9b2012-11-15 19:36:44 +000031
32load dataset Customers
33using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
34(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
35
36load dataset Customers2
37using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
38(("path"="nc1://data/semistructured/co1k_olist/customer.adm"),("format"="adm"));
39
40create index ngram_index on Customers(name) type ngram(3);
41
42write output to nc1:"rttest/inverted-index-join-noeqjoin_ngram-edit-distance-inline.adm";
43
44for $a in dataset('Customers')
45for $b in dataset('Customers2')
46let $ed := edit-distance($a.name, $b.name)
47where $ed <= 4 and $a.cid < $b.cid
48order by $ed, $a.cid, $b.cid
49return { "a": $a.name, "b": $b.name, "ed": $ed }