[ASTERIXDB-3016][RT] Fix failure in hash groupby
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- Modify hash group by to force garbage collection on the
hash table if a tuple could not be inserted into it
- Make hash group by clean up its run files in case
of an error
Change-Id: I7a133fa1d0555ebbcb7a9e3cb7445757716c9a2a
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/15325
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Dmitry Lychagin <dmitry.lychagin@couchbase.com>
Reviewed-by: Till Westmann <till@couchbase.com>
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.1.ddl.sqlpp
new file mode 100644
index 0000000..fdfee88
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.1.ddl.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse tpcds if exists;
+create dataverse tpcds;
+
+use tpcds;
+
+create dataset item(i_item_sk string not unknown) open type primary key i_item_sk;
+
+create dataset inventory(inv_date_sk string not unknown, inv_item_sk string not unknown,
+ inv_warehouse_sk string not unknown) open type primary key inv_date_sk, inv_item_sk, inv_warehouse_sk;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.2.update.sqlpp
new file mode 100644
index 0000000..f8fe178
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.2.update.sqlpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use tpcds;
+
+set `import-private-functions` `true`;
+
+insert into item (select value object_remove(t, "table_name") from tpcds_datagen("item", 0.5) t);
+
+insert into inventory (select value object_remove(t, "table_name") from tpcds_datagen("inventory", 0.5) t);
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.3.query.sqlpp
new file mode 100644
index 0000000..01158b9
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.3.query.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use tpcds;
+
+SELECT ROUND(AVG(inv.inv_quantity_on_hand), 1) qoh, i.i_product_name
+FROM inventory inv, item i
+WHERE inv.inv_item_sk /*+hash-bcast*/ = i.i_item_sk
+/*+ hash */ GROUP BY i.i_product_name
+ORDER BY qoh, i.i_product_name
+LIMIT 1;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.3.adm
new file mode 100644
index 0000000..96494f4
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.3.adm
@@ -0,0 +1 @@
+{ "qoh": 402.0, "i_product_name": "ableoughtn st" }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index 8b6b7c0..81567ff 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@ -6099,6 +6099,11 @@
<output-dir compare="Text">hash-group-by-decor</output-dir>
</compilation-unit>
</test-case>
+ <test-case FilePath="group-by">
+ <compilation-unit name="query-ASTERIXDB-3016">
+ <output-dir compare="Text">query-ASTERIXDB-3016</output-dir>
+ </compilation-unit>
+ </test-case>
</test-group>
<test-group name="index-join">
<test-case FilePath="index-join">
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/HashSpillableTableFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/HashSpillableTableFactory.java
index 4f0c304..1e5c121 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/HashSpillableTableFactory.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/HashSpillableTableFactory.java
@@ -177,16 +177,21 @@
}
// Checks whether the garbage collection is required and conducts a garbage collection if so.
- if (hashTableForTuplePointer.isGarbageCollectionNeeded()) {
+ collectGarbageInHashTableForTuplePointer(false);
+ bufferManager.clearPartition(partition);
+ }
+
+ private boolean collectGarbageInHashTableForTuplePointer(boolean force) throws HyracksDataException {
+ if (force || hashTableForTuplePointer.isGarbageCollectionNeeded()) {
int numberOfFramesReclaimed =
hashTableForTuplePointer.collectGarbage(bufferAccessor, tpcIntermediate);
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Garbage Collection on Hash table is done. Deallocated frames:"
+ numberOfFramesReclaimed);
}
+ return numberOfFramesReclaimed != -1;
}
-
- bufferManager.clearPartition(partition);
+ return false;
}
private int getPartition(int entryInHashTable) {
@@ -234,11 +239,18 @@
}
// Insertion to the hash table
- if (!hashTableForTuplePointer.insert(entryInHashTable, pointer)) {
- // To preserve the atomicity of this method, we need to undo the effect
- // of the above bufferManager.insertTuple() call since the given insertion has failed.
- bufferManager.cancelInsertTuple(pid);
- return false;
+ boolean inserted = hashTableForTuplePointer.insert(entryInHashTable, pointer);
+ if (!inserted) {
+ // Force garbage collection on the hash table and attempt to insert again
+ if (collectGarbageInHashTableForTuplePointer(true)) {
+ inserted = hashTableForTuplePointer.insert(entryInHashTable, pointer);
+ }
+ if (!inserted) {
+ // To preserve the atomicity of this method, we need to undo the effect
+ // of the above bufferManager.insertTuple() call since the given insertion has failed.
+ bufferManager.cancelInsertTuple(pid);
+ return false;
+ }
}
return true;
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupWriteOperatorNodePushable.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupWriteOperatorNodePushable.java
index 1a6f4ef..8618528 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupWriteOperatorNodePushable.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupWriteOperatorNodePushable.java
@@ -142,11 +142,15 @@
runs[i].getFileSize(), gbyFields, fdFields, groupByComparators, nmkComputer,
mergeAggregatorFactory, partialAggRecordDesc, outRecordDesc, frameLimit, level);
RunFileWriter[] runFileWriters = new RunFileWriter[partitionTable.getNumPartitions()];
- int[] sizeInTuplesNextLevel =
- buildGroup(runs[i].createDeleteOnCloseReader(), partitionTable, runFileWriters);
- for (int idFile = 0; idFile < runFileWriters.length; idFile++) {
- if (runFileWriters[idFile] != null) {
- generatedRuns.add(runFileWriters[idFile]);
+ int[] sizeInTuplesNextLevel;
+ try {
+ sizeInTuplesNextLevel =
+ buildGroup(runs[i].createDeleteOnCloseReader(), partitionTable, runFileWriters);
+ } finally {
+ for (int idFile = 0; idFile < runFileWriters.length; idFile++) {
+ if (runFileWriters[idFile] != null) {
+ generatedRuns.add(runFileWriters[idFile]);
+ }
}
}
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/ISerializableTable.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/ISerializableTable.java
index 51f9984..58ad213 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/ISerializableTable.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/ISerializableTable.java
@@ -54,7 +54,7 @@
* required to access the real tuple to calculate the original hash value
* @param tpc:
* hash function
- * @return the number of frames that are reclaimed.
+ * @return the number of frames that are reclaimed. The value -1 is returned when no compaction was happened.
* @throws HyracksDataException
*/
int collectGarbage(ITuplePointerAccessor bufferAccessor, ITuplePartitionComputer tpc) throws HyracksDataException;