1. Add an asterix-specific IntroduceGroupByCombinerRule to deal with null-test in the nested plan in a group-by operator
2. Add a regression test case for issue782, including optimizer test and runtime test

Change-Id: Ia678414451ebddb7367238fef9f22a6753aa6206
Reviewed-on: http://fulliautomatix.ics.uci.edu:8443/65
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Till Westmann <westmann@gmail.com>
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nest_aggregate.aql b/asterix-app/src/test/resources/optimizerts/queries/nest_aggregate.aql
new file mode 100644
index 0000000..16fc8e4
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nest_aggregate.aql
@@ -0,0 +1,116 @@
+/*
+ * Description  : This test case is to verify the fix for issue782
+ * https://code.google.com/p/asterixdb/issues/detail?id=782
+ * Expected Res : SUCCESS
+ * Date         : 2nd Jun 2014
+ */
+
+drop dataverse tpch if exists;
+create dataverse tpch;
+
+use dataverse tpch;
+
+create type OrderType as closed {
+  o_orderkey: int32,
+  o_custkey: int32,
+  o_orderstatus: string,
+  o_totalprice: double,
+  o_orderdate: string,
+  o_orderpriority: string,
+  o_clerk: string,
+  o_shippriority: int32,
+  o_comment: string
+}
+
+create type CustomerType as closed {
+  c_custkey: int32,
+  c_name: string,
+  c_address: string,
+  c_nationkey: int32,
+  c_phone: string,
+  c_acctbal: double,
+  c_mktsegment: string,
+  c_comment: string
+}
+
+create type SupplierType as closed {
+  s_suppkey: int32,
+  s_name: string,
+  s_address: string,
+  s_nationkey: int32,
+  s_phone: string,
+  s_acctbal: double,
+  s_comment: string
+}
+
+create type NationType as closed {
+  n_nationkey: int32,
+  n_name: string,
+  n_regionkey: int32,
+  n_comment: string
+}
+
+create type RegionType as closed {
+  r_regionkey: int32,
+  r_name: string,
+  r_comment: string
+}
+
+create type PartType as closed {
+  p_partkey: int32,
+  p_name: string,
+  p_mfgr: string,
+  p_brand: string,
+  p_type: string,
+  p_size: int32,
+  p_container: string,
+  p_retailprice: double,
+  p_comment: string
+}
+
+create type PartSuppType as closed {
+  ps_partkey: int32,
+  ps_suppkey: int32,
+  ps_availqty: int32,
+  ps_supplycost: double,
+  ps_comment: string
+}
+
+create dataset Orders(OrderType)
+  primary key o_orderkey;
+create dataset Supplier(SupplierType)
+  primary key s_suppkey;
+create dataset Region(RegionType)
+  primary key r_regionkey;
+create dataset Nation(NationType)
+  primary key n_nationkey;
+create dataset Part(PartType)
+  primary key p_partkey;
+create dataset Partsupp(PartSuppType)
+  primary key ps_partkey, ps_suppkey;
+create dataset Customer(CustomerType)
+  primary key c_custkey;
+create dataset SelectedNation(NationType)
+  primary key n_nationkey;
+
+
+
+for $nation in dataset Nation
+for $sn in dataset SelectedNation
+where $nation.n_nationkey = $sn.n_nationkey  /*+ indexnl */
+return {
+  "nation_key": $nation.n_nationkey,
+  "name": $nation.n_name,
+  "aggregates": for $order in dataset Orders
+                for $customer in dataset Customer
+                where $order.o_custkey = $customer.c_custkey
+                and  $customer.c_nationkey = $nation.n_nationkey
+                group by $orderdate := $order.o_orderdate with $order
+                let $sum := sum(for $o in $order return $o.o_totalprice)
+                order by $sum
+                limit 3
+                return {
+                  "order_date": $orderdate,
+                  "sum_price": $sum
+                }
+}
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/optimizerts/results/nest_aggregate.plan b/asterix-app/src/test/resources/optimizerts/results/nest_aggregate.plan
new file mode 100644
index 0000000..f3c1040
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/results/nest_aggregate.plan
@@ -0,0 +1,68 @@
+-- DISTRIBUTE_RESULT  |PARTITIONED|
+  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+    -- STREAM_PROJECT  |PARTITIONED|
+      -- ASSIGN  |PARTITIONED|
+        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+          -- PRE_CLUSTERED_GROUP_BY[$$49]  |PARTITIONED|
+                  {
+                    -- AGGREGATE  |LOCAL|
+                      -- STREAM_LIMIT  |LOCAL|
+                        -- IN_MEMORY_STABLE_SORT [$$43(ASC)]  |LOCAL|
+                          -- MICRO_PRE_CLUSTERED_GROUP_BY[$$50]  |LOCAL|
+                                  {
+                                    -- AGGREGATE  |LOCAL|
+                                      -- NESTED_TUPLE_SOURCE  |LOCAL|
+                                  }
+                            -- STREAM_SELECT  |LOCAL|
+                              -- NESTED_TUPLE_SOURCE  |LOCAL|
+                  }
+            -- HASH_PARTITION_MERGE_EXCHANGE MERGE:[$$49(ASC), $$50(ASC)] HASH:[$$49]  |PARTITIONED|
+              -- PRE_CLUSTERED_GROUP_BY[$$39, $$32]  |PARTITIONED|
+                      {
+                        -- AGGREGATE  |LOCAL|
+                          -- STREAM_SELECT  |LOCAL|
+                            -- NESTED_TUPLE_SOURCE  |LOCAL|
+                      }
+                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                  -- STABLE_SORT [$$39(ASC), $$32(ASC)]  |PARTITIONED|
+                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                      -- STREAM_PROJECT  |PARTITIONED|
+                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                          -- HYBRID_HASH_JOIN [$$39][$$41]  |PARTITIONED|
+                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                              -- STREAM_PROJECT  |PARTITIONED|
+                                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                  -- HYBRID_HASH_JOIN [$$39][$$35]  |PARTITIONED|
+                                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                      -- STREAM_PROJECT  |PARTITIONED|
+                                        -- ASSIGN  |PARTITIONED|
+                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                            -- DATASOURCE_SCAN  |PARTITIONED|
+                                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                      -- STREAM_PROJECT  |PARTITIONED|
+                                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                          -- DATASOURCE_SCAN  |PARTITIONED|
+                                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                              -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                            -- HASH_PARTITION_EXCHANGE [$$41]  |PARTITIONED|
+                              -- ASSIGN  |PARTITIONED|
+                                -- STREAM_PROJECT  |PARTITIONED|
+                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                    -- HYBRID_HASH_JOIN [$$44][$$37]  |PARTITIONED|
+                                      -- HASH_PARTITION_EXCHANGE [$$44]  |PARTITIONED|
+                                        -- STREAM_PROJECT  |PARTITIONED|
+                                          -- ASSIGN  |PARTITIONED|
+                                            -- STREAM_PROJECT  |PARTITIONED|
+                                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                -- DATASOURCE_SCAN  |PARTITIONED|
+                                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                    -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                        -- STREAM_PROJECT  |PARTITIONED|
+                                          -- ASSIGN  |PARTITIONED|
+                                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                              -- DATASOURCE_SCAN  |PARTITIONED|
+                                                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                  -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
diff --git a/asterix-app/src/test/resources/optimizerts/results/q09_group_by.plan b/asterix-app/src/test/resources/optimizerts/results/q09_group_by.plan
index 8e69282..fc2abd2 100644
--- a/asterix-app/src/test/resources/optimizerts/results/q09_group_by.plan
+++ b/asterix-app/src/test/resources/optimizerts/results/q09_group_by.plan
@@ -4,7 +4,7 @@
       -- ASSIGN  |PARTITIONED|
         -- STREAM_PROJECT  |PARTITIONED|
           -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-            -- HYBRID_HASH_JOIN [$$62][$$85]  |PARTITIONED|
+            -- HYBRID_HASH_JOIN [$$62][$$84]  |PARTITIONED|
               -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                 -- STREAM_PROJECT  |PARTITIONED|
                   -- STREAM_SELECT  |PARTITIONED|
@@ -12,10 +12,10 @@
                       -- DATASOURCE_SCAN  |PARTITIONED|
                         -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                           -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
-              -- HASH_PARTITION_EXCHANGE [$$85]  |PARTITIONED|
+              -- HASH_PARTITION_EXCHANGE [$$84]  |PARTITIONED|
                 -- STREAM_PROJECT  |PARTITIONED|
                   -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                    -- HYBRID_HASH_JOIN [$$64, $$63][$$69, $$85]  |PARTITIONED|
+                    -- HYBRID_HASH_JOIN [$$64, $$63][$$69, $$84]  |PARTITIONED|
                       -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                         -- STREAM_PROJECT  |PARTITIONED|
                           -- ASSIGN  |PARTITIONED|
@@ -23,15 +23,15 @@
                               -- DATASOURCE_SCAN  |PARTITIONED|
                                 -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                   -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
-                      -- HASH_PARTITION_EXCHANGE [$$85, $$69]  |PARTITIONED|
+                      -- HASH_PARTITION_EXCHANGE [$$84, $$69]  |PARTITIONED|
                         -- STREAM_PROJECT  |PARTITIONED|
                           -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                             -- HYBRID_HASH_JOIN [$$65][$$69]  |PARTITIONED|
                               -- HASH_PARTITION_EXCHANGE [$$65]  |PARTITIONED|
                                 -- STREAM_PROJECT  |PARTITIONED|
                                   -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                    -- HYBRID_HASH_JOIN [$$72][$$66]  |PARTITIONED|
-                                      -- HASH_PARTITION_EXCHANGE [$$72]  |PARTITIONED|
+                                    -- HYBRID_HASH_JOIN [$$77][$$66]  |PARTITIONED|
+                                      -- HASH_PARTITION_EXCHANGE [$$77]  |PARTITIONED|
                                         -- STREAM_PROJECT  |PARTITIONED|
                                           -- ASSIGN  |PARTITIONED|
                                             -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
diff --git a/asterix-app/src/test/resources/runtimets/queries/tpch/nest_aggregate/nest_aggregate.1.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/tpch/nest_aggregate/nest_aggregate.1.ddl.aql
new file mode 100644
index 0000000..72bca42
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/tpch/nest_aggregate/nest_aggregate.1.ddl.aql
@@ -0,0 +1,70 @@
+/*
+ * Description  : This test case is to verify the fix for issue782
+ * https://code.google.com/p/asterixdb/issues/detail?id=782
+ * Expected Res : SUCCESS
+ * Date         : 2nd Jun 2014
+ */
+
+drop dataverse tpch if exists;
+create dataverse tpch;
+
+use dataverse tpch;
+
+create type OrderType as closed {
+  o_orderkey: int32,
+  o_custkey: int32,
+  o_orderstatus: string,
+  o_totalprice: double,
+  o_orderdate: string,
+  o_orderpriority: string,
+  o_clerk: string,
+  o_shippriority: int32,
+  o_comment: string
+}
+
+create type CustomerType as closed {
+  c_custkey: int32,
+  c_name: string,
+  c_address: string,
+  c_nationkey: int32,
+  c_phone: string,
+  c_acctbal: double,
+  c_mktsegment: string,
+  c_comment: string
+}
+
+create type SupplierType as closed {
+  s_suppkey: int32,
+  s_name: string,
+  s_address: string,
+  s_nationkey: int32,
+  s_phone: string,
+  s_acctbal: double,
+  s_comment: string
+}
+
+create type NationType as closed {
+  n_nationkey: int32,
+  n_name: string,
+  n_regionkey: int32,
+  n_comment: string
+}
+
+create type RegionType as closed {
+  r_regionkey: int32,
+  r_name: string,
+  r_comment: string
+}
+
+create dataset Orders(OrderType)
+  primary key o_orderkey;
+create dataset Supplier(SupplierType)
+  primary key s_suppkey;
+create dataset Region(RegionType)
+  primary key r_regionkey;
+create dataset Nation(NationType)
+  primary key n_nationkey;
+create dataset Customer(CustomerType)
+  primary key c_custkey;
+create dataset SelectedNation(NationType)
+  primary key n_nationkey;
diff --git a/asterix-app/src/test/resources/runtimets/queries/tpch/nest_aggregate/nest_aggregate.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/tpch/nest_aggregate/nest_aggregate.2.update.aql
new file mode 100644
index 0000000..f1b2dd4
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/tpch/nest_aggregate/nest_aggregate.2.update.aql
@@ -0,0 +1,32 @@
+/*
+ * Description  : This test case is to verify the fix for issue782
+ * https://code.google.com/p/asterixdb/issues/detail?id=782
+ * Expected Res : SUCCESS
+ * Date         : 2nd Jun 2014
+ */
+
+use dataverse tpch;
+
+load dataset Orders
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/tpch0.001/orders.tbl"),("format"="delimited-text"),("delimiter"="|"));
+
+load dataset Supplier
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/tpch0.001/supplier.tbl"),("format"="delimited-text"),("delimiter"="|"));
+
+load dataset Region
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/tpch0.001/region.tbl"),("format"="delimited-text"),("delimiter"="|"));
+
+load dataset Nation
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/tpch0.001/nation.tbl"),("format"="delimited-text"),("delimiter"="|"));
+
+load dataset Customer
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/tpch0.001/customer.tbl"),("format"="delimited-text"),("delimiter"="|"));
+
+load dataset SelectedNation
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/tpch0.001/selectednation.tbl"),("format"="delimited-text"),("delimiter"="|"));
diff --git a/asterix-app/src/test/resources/runtimets/queries/tpch/nest_aggregate/nest_aggregate.3.query.aql b/asterix-app/src/test/resources/runtimets/queries/tpch/nest_aggregate/nest_aggregate.3.query.aql
new file mode 100644
index 0000000..5321522
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/tpch/nest_aggregate/nest_aggregate.3.query.aql
@@ -0,0 +1,29 @@
+/*
+ * Description  : This test case is to verify the fix for issue782
+ * https://code.google.com/p/asterixdb/issues/detail?id=782
+ * Expected Res : SUCCESS
+ * Date         : 2nd Jun 2014
+ */
+
+use dataverse tpch;
+
+for $nation in dataset Nation
+for $sn in dataset SelectedNation
+where $nation.n_nationkey = $sn.n_nationkey  /*+ indexnl */
+return {
+  "nation_key": $nation.n_nationkey,
+  "name": $nation.n_name,
+  "aggregates":
+                for $order in dataset Orders
+                for $customer in dataset Customer
+                where $order.o_custkey = $customer.c_custkey
+                and  $customer.c_nationkey = $nation.n_nationkey
+                group by $orderdate := $order.o_orderdate with $order
+                let $sum := sum(for $o in $order return $o.o_totalprice)
+                order by $sum
+                limit 3
+                return {
+                      "order_date": $orderdate,
+                      "sum_price": $sum
+                }
+}
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/results/tpch/nest_aggregate/nest_aggregate.1.adm b/asterix-app/src/test/resources/runtimets/results/tpch/nest_aggregate/nest_aggregate.1.adm
new file mode 100644
index 0000000..f63ed7e
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/tpch/nest_aggregate/nest_aggregate.1.adm
@@ -0,0 +1,11 @@
+{ "nation_key": 1, "name": "ARGENTINA", "aggregates": [ { "order_date": "1997-08-14", "sum_price": 16763.95d }, { "order_date": "1997-11-26", "sum_price": 18653.09d }, { "order_date": "1998-04-20", "sum_price": 24637.96d } ] }
+{ "nation_key": 2, "name": "BRAZIL", "aggregates": [  ] }
+{ "nation_key": 19, "name": "ROMANIA", "aggregates": [  ] }
+{ "nation_key": 21, "name": "VIETNAM", "aggregates": [  ] }
+{ "nation_key": 3, "name": "CANADA", "aggregates": [ { "order_date": "1992-02-22", "sum_price": 1084.38d }, { "order_date": "1992-11-28", "sum_price": 4766.19d }, { "order_date": "1995-02-17", "sum_price": 4913.06d } ] }
+{ "nation_key": 23, "name": "UNITED KINGDOM", "aggregates": [  ] }
+{ "nation_key": 4, "name": "EGYPT", "aggregates": [ { "order_date": "1998-04-19", "sum_price": 3089.42d }, { "order_date": "1996-03-12", "sum_price": 3892.77d }, { "order_date": "1997-07-25", "sum_price": 11405.4d } ] }
+{ "nation_key": 22, "name": "RUSSIA", "aggregates": [  ] }
+{ "nation_key": 24, "name": "UNITED STATES", "aggregates": [  ] }
+{ "nation_key": 0, "name": "ALGERIA", "aggregates": [ { "order_date": "1994-05-27", "sum_price": 1051.15d }, { "order_date": "1994-05-08", "sum_price": 4819.91d }, { "order_date": "1993-08-27", "sum_price": 10500.27d } ] }
+{ "nation_key": 20, "name": "SAUDI ARABIA", "aggregates": [  ] }
diff --git a/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterix-app/src/test/resources/runtimets/testsuite.xml
index 7d1e611..8cf4cd9 100644
--- a/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -4292,6 +4292,11 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="tpch">
+      <compilation-unit name="nest_aggregate">
+        <output-dir compare="Text">nest_aggregate</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="tpch">
       <compilation-unit name="q10_returned_item">
         <output-dir compare="Text">q10_returned_item</output-dir>
       </compilation-unit>