Expected Hash Table footprint size calculation fix
- Fix the expected hash table footprint size calculation
based on the cardinality. An operand was missing.
Change-Id: I9eb658a189fcf3d68978f627959c67d0c2641a29
Reviewed-on: https://asterix-gerrit.ics.uci.edu/1738
Sonar-Qube: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Yingyi Bu <buyingyi@gmail.com>
diff --git a/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/queries/q7.sqlpp b/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/queries/q7.sqlpp
index bc6a7a6..9a96ea4 100644
--- a/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/queries/q7.sqlpp
+++ b/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/queries/q7.sqlpp
@@ -17,8 +17,6 @@
* under the License.
*/
-// Error: sporadically dead node.
-
USE tpch;
WITH q7_volume_shipping_tmp AS
diff --git a/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/queries/q7_variant.sqlpp b/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/queries/q7_variant.sqlpp
new file mode 100644
index 0000000..5e71471
--- /dev/null
+++ b/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/queries/q7_variant.sqlpp
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE tpch;
+
+WITH q7_volume_shipping_tmp AS
+(
+ SELECT n1.n_name AS supp_nation,
+ n2.n_name AS cust_nation,
+ n1.n_nationkey AS s_nationkey,
+ n2.n_nationkey AS c_nationkey
+ FROM Nation as n1,
+ Nation as n2
+ WHERE (n1.n_name='FRANCE' AND n2.n_name='GERMANY') OR (n1.n_name='GERMANY' AND n2.n_name='FRANCE')
+)
+
+SELECT supp_nation, cust_nation, l_year, sum(volume) AS revenue
+FROM
+ (
+ SELECT t.supp_nation, t.cust_nation, GET_YEAR(l3.l_shipdate) AS l_year,
+ l3.l_extendedprice * (1 - l3.l_discount) AS volume
+ FROM q7_volume_shipping_tmp t JOIN
+ (
+ SELECT l2.l_shipdate, l2.l_extendedprice, l2.l_discount, l2.c_nationkey, s.s_nationkey
+ FROM Supplier s JOIN
+ (
+ SELECT l1.l_shipdate, l1.l_extendedprice, l1.l_discount, l1.l_suppkey, c.c_nationkey
+ FROM Customer c JOIN
+ (
+ SELECT l.l_shipdate, l.l_extendedprice, l.l_discount, l.l_suppkey, o.o_custkey
+ FROM Orders o
+ JOIN LineItem l ON o.o_orderkey = l.l_orderkey AND l.l_shipdate >= '1995-01-01'
+ AND l.l_shipdate <= '1996-12-31'
+ ) l1 ON c.c_custkey = l1.o_custkey
+
+ ) l2 ON s.s_suppkey = l2.l_suppkey
+ ) l3 ON t.c_nationkey = l3.c_nationkey AND t.s_nationkey = l3.s_nationkey
+ ) shipping
+GROUP BY supp_nation, cust_nation, l_year
+ORDER BY supp_nation, cust_nation, l_year;
+
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
index f699075..c44c583 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
@@ -480,8 +480,8 @@
hashFunctionGeneratorFactories).createPartitioner(level);
int frameSize = ctx.getInitialFrameSize();
- long buildPartSize = buildSideReader.getFileSize() / frameSize;
- long probePartSize = probeSideReader.getFileSize() / frameSize;
+ long buildPartSize = (long) Math.ceil((double) buildSideReader.getFileSize() / (double) frameSize);
+ long probePartSize = (long) Math.ceil((double) probeSideReader.getFileSize() / (double) frameSize);
int beforeMax = Math.max(buildSizeInTuple, probeSizeInTuple);
if (LOGGER.isLoggable(Level.FINE)) {
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/SimpleSerializableHashTable.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/SimpleSerializableHashTable.java
index 5b7d364..b1d1f27 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/SimpleSerializableHashTable.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/SimpleSerializableHashTable.java
@@ -442,7 +442,7 @@
* expected the byte size of the hash table
*/
public static long getExpectedTableFrameCount(long tableSize, int frameSize) {
- long numberOfHeaderFrame = (long) (Math.ceil((double) tableSize * 2 / (double) frameSize));
+ long numberOfHeaderFrame = (long) (Math.ceil((double) tableSize * 2 * getUnitSize() / (double) frameSize));
long numberOfContentFrame = (long) (Math
.ceil(((double) getNumberOfEntryInSlot() * 2 * getUnitSize() * tableSize) / (double) frameSize));
return numberOfHeaderFrame + numberOfContentFrame;
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupOperatorDescriptorTest.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupOperatorDescriptorTest.java
index 392aab5..794ff98 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupOperatorDescriptorTest.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupOperatorDescriptorTest.java
@@ -42,7 +42,7 @@
int frameSize = 256;
int resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)",
memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
- Assert.assertTrue(resultCardinality == 9);
+ Assert.assertTrue(resultCardinality == 10);
// Sets the frame size to 128KB.
frameSize = 128 * 1024;
@@ -51,31 +51,31 @@
memoryBudgetInBytes = 1024 * 1024;
resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)",
memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
- Assert.assertTrue(resultCardinality == 19660);
+ Assert.assertTrue(resultCardinality == 20388);
// Test 3: memory size: 100 MB, frame size: 128 KB, 1 column group-by
memoryBudgetInBytes = 1024 * 1024 * 100;
resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)",
memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
- Assert.assertTrue(resultCardinality == 1937883);
+ Assert.assertTrue(resultCardinality == 2016724);
// Test 4: memory size: 1 GB, frame size: 128 KB, 1 column group-by
memoryBudgetInBytes = 1024 * 1024 * 1024;
resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)",
memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
- Assert.assertTrue(resultCardinality == 19841178);
+ Assert.assertTrue(resultCardinality == 20649113);
// Test 5: memory size: 10 GB, frame size: 128 KB, 1 column group-by
memoryBudgetInBytes = 1024 * 1024 * 1024 * 10L;
resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)",
memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
- Assert.assertTrue(resultCardinality == 198409112);
+ Assert.assertTrue(resultCardinality == 206489044);
// Test 6: memory size: 100 GB, frame size: 128 KB, 1 column group-by
memoryBudgetInBytes = 1024 * 1024 * 1024 * 100L;
resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)",
memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
- Assert.assertTrue(resultCardinality == 1962753871);
+ Assert.assertTrue(resultCardinality == 2045222521);
// Test 7: memory size: 1 TB, frame size: 128 KB, 1 column group-by
// The cardinality will be set to Integer.MAX_VALUE in this case since the budget is too huge.
@@ -90,28 +90,28 @@
numberOfGroupByColumns = 2;
resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)",
memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
- Assert.assertTrue(resultCardinality == 16681);
+ Assert.assertTrue(resultCardinality == 17825);
// Test 9: memory size: 1 MB, frame size: 128 KB, 3 columns group-by
memoryBudgetInBytes = 1024 * 1024;
numberOfGroupByColumns = 3;
resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)",
memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
- Assert.assertTrue(resultCardinality == 15176);
+ Assert.assertTrue(resultCardinality == 16227);
// Test 10: memory size: 1 MB, frame size: 128 KB, 4 columns group-by
memoryBudgetInBytes = 1024 * 1024;
numberOfGroupByColumns = 4;
resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)",
memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
- Assert.assertTrue(resultCardinality == 13878);
+ Assert.assertTrue(resultCardinality == 14563);
// Test 11: memory size: 32 MB, frame size: 128 KB, 2 columns group-by
memoryBudgetInBytes = 1024 * 1024 * 32L;
numberOfGroupByColumns = 4;
resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)",
memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
- Assert.assertTrue(resultCardinality == 408503);
+ Assert.assertTrue(resultCardinality == 441913);
}
}