blob: cd5c8aa49187b51f221e09dc9555865b7368fdcb [file] [log] [blame]
buyingyi55df5212013-03-24 07:20:08 +00001-- create the tables and load the data
2create external table lineitem (L_ORDERKEY INT, L_PARTKEY INT, L_SUPPKEY INT, L_LINENUMBER INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/100/lineitem';
3create external table orders (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/100/orders';
4
5-- create the result table
6create table q12_shipping(l_shipmode string, high_line_count double, low_line_count double);
7
8set mapred.min.split.size=536870912;
9set hive.exec.reducers.bytes.per.reducer=1225000000;
10
11-- the query
12insert overwrite table q12_shipping
13select
14 l_shipmode,
15 sum(case
16 when o_orderpriority ='1-URGENT'
17 or o_orderpriority ='2-HIGH'
18 then 1
19 else 0
20end
21 ) as high_line_count,
22 sum(case
23 when o_orderpriority <> '1-URGENT'
24 and o_orderpriority <> '2-HIGH'
25 then 1
26 else 0
27end
28 ) as low_line_count
29from
30 orders o join lineitem l
31 on
32 o.o_orderkey = l.l_orderkey and l.l_commitdate < l.l_receiptdate
33and l.l_shipdate < l.l_commitdate and l.l_receiptdate >= '1994-01-01'
34and l.l_receiptdate < '1995-01-01'
35where
36 l.l_shipmode = 'MAIL' or l.l_shipmode = 'SHIP'
37group by l_shipmode
38order by l_shipmode;
39
40DROP TABLE lineitem;
41DROP TABLE orders;
42DROP TABLE q12_shipping;