Several major changes in hyracks:
-- reduced CC/NC communications for reporting partition request and availability; partition request/availability are only reported for the case of send-side materialized (without pipelining) policies in case of task re-attempt.
-- changed buffer cache to dynamically allocate memory based on needs instead of pre-allocating
-- changed each network channel to lazily allocate memory based on needs, and changed materialized connectors to lazily allocate files based on needs
-- changed several major CCNCCFunctions to use non-java serde
-- added a sort-based group-by operator which pushes group-by aggregations into an external sort
-- make external sort a stable sort
1,3,and 4 is to reduce the job overhead.
2 is to reduce the unecessary NC resource consumptions such as memory and files.
5 and 6 are improvements to runtime operators.
One change in algebricks:
-- implemented a rule to push group-by aggregation into sort, i.e., using the sort-based gby operator
Several important changes in pregelix:
-- remove static states in vertex
-- direct check halt bit without deserialization
-- optimize the sort algorithm by packing yet-another 2-byte normalized key into the tPointers array
Change-Id: Id696f9a9f1647b4a025b8b33d20b3a89127c60d6
Reviewed-on: http://fulliautomatix.ics.uci.edu:8443/35
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Till Westmann <westmann@gmail.com>
diff --git a/hivesterix-dist/src/test/resources/optimizerts/results/q12_shipping.plan b/hivesterix-dist/src/test/resources/optimizerts/results/q12_shipping.plan
index d976bba..a6aa730 100644
--- a/hivesterix-dist/src/test/resources/optimizerts/results/q12_shipping.plan
+++ b/hivesterix-dist/src/test/resources/optimizerts/results/q12_shipping.plan
@@ -6,53 +6,49 @@
-- ASSIGN |PARTITIONED|
exchange
-- SORT_MERGE_EXCHANGE [$$26(ASC) ] |PARTITIONED|
- order (ASC, %0->$$26)
- -- STABLE_SORT [$$26(ASC)] |PARTITIONED|
+ group by ([$$26 := %0->$$34]) decor ([]) {
+ aggregate [$$27, $$28] <- [function-call: hive:sum(FINAL), Args:[%0->$$32], function-call: hive:sum(FINAL), Args:[%0->$$33]]
+ -- AGGREGATE |LOCAL|
+ nested tuple source
+ -- NESTED_TUPLE_SOURCE |LOCAL|
+ }
+ -- PRE_CLUSTERED_GROUP_BY[$$34] |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- group by ([$$26 := %0->$$34]) decor ([]) {
- aggregate [$$27, $$28] <- [function-call: hive:sum(FINAL), Args:[%0->$$32], function-call: hive:sum(FINAL), Args:[%0->$$33]]
+ -- HASH_PARTITION_MERGE_EXCHANGE MERGE:[$$34(ASC)] HASH:[$$34] |PARTITIONED|
+ group by ([$$34 := %0->$$24]) decor ([]) {
+ aggregate [$$32, $$33] <- [function-call: hive:sum(PARTIAL1), Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen, Args:[function-call: algebricks:or, Args:[function-call: algebricks:eq, Args:[%0->$$6, 1-URGENT], function-call: algebricks:eq, Args:[%0->$$6, 2-HIGH]], 1, 0]], function-call: hive:sum(PARTIAL1), Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen, Args:[function-call: algebricks:and, Args:[function-call: algebricks:neq, Args:[%0->$$6, 1-URGENT], function-call: algebricks:neq, Args:[%0->$$6, 2-HIGH]], 1, 0]]]
-- AGGREGATE |LOCAL|
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$34] |PARTITIONED|
+ -- SORT_GROUP_BY[$$24] |PARTITIONED|
exchange
- -- HASH_PARTITION_EXCHANGE [$$34] |PARTITIONED|
- group by ([$$34 := %0->$$24]) decor ([]) {
- aggregate [$$32, $$33] <- [function-call: hive:sum(PARTIAL1), Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen, Args:[function-call: algebricks:or, Args:[function-call: algebricks:eq, Args:[%0->$$6, 1-URGENT], function-call: algebricks:eq, Args:[%0->$$6, 2-HIGH]], 1, 0]], function-call: hive:sum(PARTIAL1), Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen, Args:[function-call: algebricks:and, Args:[function-call: algebricks:neq, Args:[%0->$$6, 1-URGENT], function-call: algebricks:neq, Args:[%0->$$6, 2-HIGH]], 1, 0]]]
- -- AGGREGATE |LOCAL|
- nested tuple source
- -- NESTED_TUPLE_SOURCE |LOCAL|
- }
- -- EXTERNAL_GROUP_BY[$$24] |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$6, $$24])
+ -- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- project ([$$6, $$24])
- -- STREAM_PROJECT |PARTITIONED|
+ join (function-call: algebricks:eq, Args:[%0->$$10, %0->$$1])
+ -- HYBRID_HASH_JOIN [$$10][$$1] |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- join (function-call: algebricks:eq, Args:[%0->$$10, %0->$$1])
- -- HYBRID_HASH_JOIN [$$10][$$1] |PARTITIONED|
- exchange
- -- HASH_PARTITION_EXCHANGE [$$10] |PARTITIONED|
- project ([$$10, $$24])
- -- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$21, %0->$$22], function-call: algebricks:lt, Args:[%0->$$20, %0->$$21], function-call: algebricks:ge, Args:[%0->$$22, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$22, 1995-01-01], function-call: algebricks:or, Args:[function-call: algebricks:eq, Args:[%0->$$24, MAIL], function-call: algebricks:eq, Args:[%0->$$24, SHIP]]])
- -- STREAM_SELECT |PARTITIONED|
+ -- HASH_PARTITION_EXCHANGE [$$10] |PARTITIONED|
+ project ([$$10, $$24])
+ -- STREAM_PROJECT |PARTITIONED|
+ select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$21, %0->$$22], function-call: algebricks:lt, Args:[%0->$$20, %0->$$21], function-call: algebricks:ge, Args:[%0->$$22, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$22, 1995-01-01], function-call: algebricks:or, Args:[function-call: algebricks:eq, Args:[%0->$$24, MAIL], function-call: algebricks:eq, Args:[%0->$$24, SHIP]]])
+ -- STREAM_SELECT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan [$$21, $$20, $$22, $$24, $$10]<-[$$10, $$11, $$12, $$13, $$14, $$15, $$16, $$17, $$18, $$19, $$20, $$21, $$22, $$23, $$24, $$25] <- default.lineitem
+ -- DATASOURCE_SCAN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- data-scan [$$21, $$20, $$22, $$24, $$10]<-[$$10, $$11, $$12, $$13, $$14, $$15, $$16, $$17, $$18, $$19, $$20, $$21, $$22, $$23, $$24, $$25] <- default.lineitem
- -- DATASOURCE_SCAN |PARTITIONED|
- exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ exchange
+ -- HASH_PARTITION_EXCHANGE [$$1] |PARTITIONED|
+ data-scan [$$1, $$6]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7, $$8, $$9] <- default.orders
+ -- DATASOURCE_SCAN |PARTITIONED|
exchange
- -- HASH_PARTITION_EXCHANGE [$$1] |PARTITIONED|
- data-scan [$$1, $$6]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7, $$8, $$9] <- default.orders
- -- DATASOURCE_SCAN |PARTITIONED|
- exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|