blob: c82f62c46668b14437bb147b9d69a9865e3a7b37 [file] [log] [blame]
Yingyi Bu391f09e2015-10-29 13:49:39 -07001/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20drop database fuzzyjoin_078 if exists;
21create database fuzzyjoin_078;
22
23use fuzzyjoin_078;
24
25
26create type fuzzyjoin_078.DBLPType as
27{
28 id : int32,
29 dblpid : string,
30 title : string,
31 authors : string,
32 misc : string
33}
34
35create nodegroup group1 if not exists on
Ian Maxonf7b64532015-12-09 17:28:18 -080036 asterix_nc1,
37 asterix_nc2
Yingyi Bu391f09e2015-10-29 13:49:39 -070038;
39create table DBLP_fuzzyjoin_078(DBLPType) primary key id on group1;
40
Ian Maxonf7b64532015-12-09 17:28:18 -080041write output to asterix_nc1:"rttest/fuzzyjoin_078.adm"
Yingyi Bu391f09e2015-10-29 13:49:39 -070042select element {'id':paperDBLP.id,'tokens':tokensDBLP}
43from DBLP_fuzzyjoin_078 as paperDBLP
44with unrankedTokensDBLP as fuzzyjoin_078."counthashed-word-tokens"(paperDBLP.title),
45 tokensDBLP as (
46 select element i
47 from unrankedTokensDBLP as token,
48 (
49 select element tokenGroupped
50 from DBLP_fuzzyjoin_078 as paper,
51 fuzzyjoin_078."counthashed-word-tokens"(paper.title) as token
52 /* +hash */
53 group by token as tokenGroupped
Yingyi Bu3dd80ec2016-04-01 10:31:53 -070054 order by count(paper),tokenGroupped
Yingyi Bu391f09e2015-10-29 13:49:39 -070055 ) as tokenRanked at i
56 where (token = tokenRanked)
57 order by i
58 )
59order by paperDBLP.id
60;