blob: 57d72e5a5bfd110f460d3ea14d066be9788d4578 [file] [log] [blame]
buyingyic73348c2012-11-02 00:31:31 +00001DROP TABLE IF EXISTS lineitem;
2DROP TABLE IF EXISTS supplier;
3DROP TABLE IF EXISTS revenue;
4DROP TABLE IF EXISTS max_revenue;
5DROP TABLE IF EXISTS q15_top_supplier;
6
7-- create the tables and load the data
8create external table lineitem (L_ORDERKEY INT, L_PARTKEY INT, L_SUPPKEY INT, L_LINENUMBER INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/100/lineitem';
9create external table supplier (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/100/supplier';
10
11-- create result tables
12create table revenue(supplier_no int, total_revenue double);
13create table max_revenue(max_revenue double);
14create table q15_top_supplier(s_suppkey int, s_name string, s_address string, s_phone string, total_revenue double);
15
16-- the query
17insert overwrite table revenue
18select
19 l_suppkey as supplier_no, sum(l_extendedprice * (1 - l_discount)) as total_revenue
20from
21 lineitem
22where
23 l_shipdate >= '1996-01-01' and l_shipdate < '1996-04-01'
24group by l_suppkey;
25
26insert overwrite table max_revenue
27select
28 max(total_revenue)
29from
30 revenue;
31
32insert overwrite table q15_top_supplier
33select
34 s_suppkey, s_name, s_address, s_phone, total_revenue
35from supplier s join revenue r
36 on
37 s.s_suppkey = r.supplier_no
38 join max_revenue m
39 on
40 r.total_revenue = m.max_revenue
41order by s_suppkey;