| -- create the tables and load the data |
| create external table lineitem (L_ORDERKEY INT, L_PARTKEY INT, L_SUPPKEY INT, L_LINENUMBER INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/100/lineitem'; |
| create external table supplier (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/100/supplier'; |
| |
| -- create result tables |
| create table revenue(supplier_no int, total_revenue double); |
| create table max_revenue(max_revenue double); |
| create table q15_top_supplier(s_suppkey int, s_name string, s_address string, s_phone string, total_revenue double); |
| |
| |
| set mapred.min.split.size=536870912; |
| |
| -- the query |
| insert overwrite table revenue |
| select |
| l_suppkey as supplier_no, sum(l_extendedprice * (1 - l_discount)) as total_revenue |
| from |
| lineitem |
| where |
| l_shipdate >= '1996-01-01' and l_shipdate < '1996-04-01' |
| group by l_suppkey; |
| |
| insert overwrite table max_revenue |
| select |
| max(total_revenue) |
| from |
| revenue; |
| |
| insert overwrite table q15_top_supplier |
| select |
| s_suppkey, s_name, s_address, s_phone, total_revenue |
| from supplier s join revenue r |
| on |
| s.s_suppkey = r.supplier_no |
| join max_revenue m |
| on |
| r.total_revenue = m.max_revenue |
| order by s_suppkey; |
| |
| DROP TABLE lineitem; |
| DROP TABLE supplier; |
| DROP TABLE revenue; |
| DROP TABLE max_revenue; |
| DROP TABLE q15_top_supplier; |