blob: 30d97b8fbaf177179b1f0f98bb742350a1c98ded [file] [log] [blame]
alexander.behmc576c602012-07-06 02:41:15 +00001/*
2 * Description : Tests whether an ngram_index is applied to optimize a selection query using the similarity-jaccard-check function on 3-gram tokens.
3 * Tests that the optimizer rule correctly drills through the let clauses.
4 * The index should be applied.
5 * Success : Yes
6 */
7
8drop dataverse test if exists;
9create dataverse test;
10use dataverse test;
11
12create type DBLPType as closed {
13 id: int32,
14 dblpid: string,
15 title: string,
16 authors: string,
17 misc: string
18}
19
20create dataset DBLP(DBLPType) partitioned by key id;
21
22load dataset DBLP
23using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
24(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
25
26create index ngram_index on DBLP(title) type ngram(3);
27
28write output to nc1:"rttest/inverted-index-complex_ngram-jaccard-check-multi-let.adm";
29
30// This test is complex because we have three assigns to drill into.
31for $paper in dataset('DBLP')
32let $paper_tokens := gram-tokens($paper.title, 3, false)
33let $query_tokens := gram-tokens("Transactions for Cooperative Environments", 3, false)
34let $jacc := similarity-jaccard-check($paper_tokens, $query_tokens, 0.5f)
35where $jacc[0]
36return {"Paper": $paper_tokens, "Query": $query_tokens }