blob: d3d73c3b28edcfbf94be33db468ee14bf1857a6f [file] [log] [blame]
buyingyic73348c2012-11-02 00:31:31 +00001-- create the tables and load the data
2create external table lineitem (L_ORDERKEY INT, L_PARTKEY INT, L_SUPPKEY INT, L_LINENUMBER INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/100/lineitem';
3create external table supplier (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/100/supplier';
4
5-- create result tables
6create table revenue(supplier_no int, total_revenue double);
7create table max_revenue(max_revenue double);
8create table q15_top_supplier(s_suppkey int, s_name string, s_address string, s_phone string, total_revenue double);
9
10
11set mapred.min.split.size=536870912;
12
13-- the query
14insert overwrite table revenue
15select
16 l_suppkey as supplier_no, sum(l_extendedprice * (1 - l_discount)) as total_revenue
17from
18 lineitem
19where
20 l_shipdate >= '1996-01-01' and l_shipdate < '1996-04-01'
21group by l_suppkey;
22
23insert overwrite table max_revenue
24select
25 max(total_revenue)
26from
27 revenue;
28
29insert overwrite table q15_top_supplier
30select
31 s_suppkey, s_name, s_address, s_phone, total_revenue
32from supplier s join revenue r
33 on
34 s.s_suppkey = r.supplier_no
35 join max_revenue m
36 on
37 r.total_revenue = m.max_revenue
38order by s_suppkey;
39
40DROP TABLE lineitem;
41DROP TABLE supplier;
42DROP TABLE revenue;
43DROP TABLE max_revenue;
44DROP TABLE q15_top_supplier;