| -- create tables and load data |
| create external table customer (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/100/customer'; |
| Create external table lineitem (L_ORDERKEY INT, L_PARTKEY INT, L_SUPPKEY INT, L_LINENUMBER INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/100/lineitem'; |
| create external table orders (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/100/orders'; |
| create external table supplier (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/100/supplier'; |
| create external table nation (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/100/nation'; |
| |
| -- create the target table |
| create table q7_volume_shipping (supp_nation string, cust_nation string, l_year int, revenue double); |
| create table q7_volume_shipping_tmp(supp_nation string, cust_nation string, s_nationkey int, c_nationkey int); |
| |
| set mapred.min.split.size=536870912; |
| set hive.exec.reducers.bytes.per.reducer=1225000000; |
| |
| -- the query |
| insert overwrite table q7_volume_shipping_tmp |
| select |
| * |
| from |
| ( |
| select |
| n1.n_name as supp_nation, n2.n_name as cust_nation, n1.n_nationkey as s_nationkey, |
| n2.n_nationkey as c_nationkey |
| from |
| nation n1 join nation n2 |
| on |
| n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY' |
| UNION ALL |
| select |
| n1.n_name as supp_nation, n2.n_name as cust_nation, n1.n_nationkey as s_nationkey, |
| n2.n_nationkey as c_nationkey |
| from |
| nation n1 join nation n2 |
| on |
| n2.n_name = 'FRANCE' and n1.n_name = 'GERMANY' |
| ) a; |
| |
| insert overwrite table q7_volume_shipping |
| select |
| supp_nation, cust_nation, l_year, sum(volume) as revenue |
| from |
| ( |
| select |
| supp_nation, cust_nation, year(l_shipdate) as l_year, |
| l_extendedprice * (1 - l_discount) as volume |
| from |
| q7_volume_shipping_tmp t join |
| (select l_shipdate, l_extendedprice, l_discount, c_nationkey, s_nationkey |
| from supplier s join |
| (select l_shipdate, l_extendedprice, l_discount, l_suppkey, c_nationkey |
| from customer c join |
| (select l_shipdate, l_extendedprice, l_discount, l_suppkey, o_custkey |
| from orders o join lineitem l |
| on |
| o.o_orderkey = l.l_orderkey and l.l_shipdate >= '1995-01-01' |
| and l.l_shipdate <= '1996-12-31' |
| ) l1 on c.c_custkey = l1.o_custkey |
| ) l2 on s.s_suppkey = l2.l_suppkey |
| ) l3 on l3.c_nationkey = t.c_nationkey and l3.s_nationkey = t.s_nationkey |
| ) shipping |
| group by supp_nation, cust_nation, l_year |
| order by supp_nation, cust_nation, l_year; |
| |
| DROP TABLE customer; |
| DROP TABLE orders; |
| DROP TABLE lineitem; |
| DROP TABLE supplier; |
| DROP TABLE nation; |
| DROP TABLE q7_volume_shipping; |
| DROP TABLE q7_volume_shipping_tmp; |