| -- create tables and load data |
| Create external table lineitem (L_ORDERKEY INT, L_PARTKEY INT, L_SUPPKEY INT, L_LINENUMBER INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/100/lineitem'; |
| create external table orders (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/100/orders'; |
| create external table customer (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/100/customer'; |
| |
| -- create the target table |
| create table q3_shipping_priority (l_orderkey int, revenue double, o_orderdate string, o_shippriority int); |
| |
| set mapred.min.split.size=536870912; |
| set hive.exec.reducers.bytes.per.reducer=1024000000; |
| |
| -- the query |
| Insert overwrite table q3_shipping_priority |
| select |
| l_orderkey, sum(l_extendedprice*(1-l_discount)) as revenue, o_orderdate, o_shippriority |
| from |
| customer c join orders o |
| on c.c_mktsegment = 'BUILDING' and c.c_custkey = o.o_custkey |
| join lineitem l |
| on l.l_orderkey = o.o_orderkey |
| where |
| o_orderdate < '1995-03-15' and l_shipdate > '1995-03-15' |
| group by l_orderkey, o_orderdate, o_shippriority |
| order by revenue desc, o_orderdate |
| limit 10; |
| |
| DROP TABLE orders; |
| DROP TABLE lineitem; |
| DROP TABLE customer; |
| DROP TABLE q3_shipping_priority; |