[ASTERIXDB-2783] Fix hash collision for hash join/groupby

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
- Use a random seed for hash join/groupby to avoid hash collisions
with the hash partitioning
- Slightly increase the join memory so that the large object join
test case can still pass.

Change-Id: If2aa02384129293e80015efc3d1f60b57f98909c
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/8123
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Dmitry Lychagin <dmitry.lychagin@couchbase.com>
diff --git a/asterixdb/asterix-app/src/main/resources/cc.conf b/asterixdb/asterix-app/src/main/resources/cc.conf
index ccd35f8..d5da6d4 100644
--- a/asterixdb/asterix-app/src/main/resources/cc.conf
+++ b/asterixdb/asterix-app/src/main/resources/cc.conf
@@ -55,7 +55,7 @@
 compiler.framesize=32KB
 compiler.sortmemory=320KB
 compiler.groupmemory=160KB
-compiler.joinmemory=256KB
+compiler.joinmemory=768KB
 compiler.textsearchmemory=160KB
 compiler.windowmemory=192KB
 compiler.sort.parallel=false
diff --git a/asterixdb/asterix-app/src/main/resources/cc3.conf b/asterixdb/asterix-app/src/main/resources/cc3.conf
index 88362aa..d2a8556 100644
--- a/asterixdb/asterix-app/src/main/resources/cc3.conf
+++ b/asterixdb/asterix-app/src/main/resources/cc3.conf
@@ -51,7 +51,7 @@
 compiler.framesize=32KB
 compiler.sortmemory=320KB
 compiler.groupmemory=160KB
-compiler.joinmemory=256KB
+compiler.joinmemory=1024KB
 compiler.textsearchmemory=160KB
 compiler.windowmemory=192KB
 compiler.parallelism=3
diff --git a/asterixdb/asterix-app/src/test/resources/cc-compression.conf b/asterixdb/asterix-app/src/test/resources/cc-compression.conf
index c8d9780..a3047a0 100644
--- a/asterixdb/asterix-app/src/test/resources/cc-compression.conf
+++ b/asterixdb/asterix-app/src/test/resources/cc-compression.conf
@@ -50,7 +50,7 @@
 compiler.framesize=32KB
 compiler.sortmemory=320KB
 compiler.groupmemory=160KB
-compiler.joinmemory=256KB
+compiler.joinmemory=768KB
 compiler.textsearchmemory=160KB
 compiler.windowmemory=192KB
 messaging.frame.size=4096
diff --git a/asterixdb/asterix-app/src/test/resources/cc-ssl.conf b/asterixdb/asterix-app/src/test/resources/cc-ssl.conf
index 499e9fc..1c0a68f 100644
--- a/asterixdb/asterix-app/src/test/resources/cc-ssl.conf
+++ b/asterixdb/asterix-app/src/test/resources/cc-ssl.conf
@@ -62,7 +62,7 @@
 compiler.framesize=32KB
 compiler.sortmemory=320KB
 compiler.groupmemory=160KB
-compiler.joinmemory=256KB
+compiler.joinmemory=768KB
 compiler.textsearchmemory=160KB
 compiler.windowmemory=192KB
 messaging.frame.size=4096
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
index e377fd1..cc96921 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
@@ -12,7 +12,7 @@
     "compiler\.groupmemory" : 163840,
     "compiler\.indexonly" : true,
     "compiler\.internal\.sanitycheck" : true,
-    "compiler\.joinmemory" : 262144,
+    "compiler\.joinmemory" : 786432,
     "compiler\.parallelism" : 0,
     "compiler\.sort\.parallel" : false,
     "compiler\.sort\.samples" : 100,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
index e51d12f..68d3079 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
@@ -12,7 +12,7 @@
     "compiler\.groupmemory" : 163840,
     "compiler\.indexonly" : true,
     "compiler\.internal\.sanitycheck" : false,
-    "compiler\.joinmemory" : 262144,
+    "compiler\.joinmemory" : 1048576,
     "compiler\.parallelism" : 3,
     "compiler\.sort\.parallel" : true,
     "compiler\.sort\.samples" : 100,
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupBuildOperatorNodePushable.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupBuildOperatorNodePushable.java
index 43f57af..20d223e 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupBuildOperatorNodePushable.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupBuildOperatorNodePushable.java
@@ -38,6 +38,11 @@
 
 public class ExternalGroupBuildOperatorNodePushable extends AbstractUnaryInputSinkOperatorNodePushable
         implements IRunFileWriterGenerator {
+    /**
+     * Use a random seed to avoid hash collision with the hash exchange operator.
+     * See https://issues.apache.org/jira/browse/ASTERIXDB-2783 for more details.
+     */
+    private static final int INIT_SEED = 573275022;
 
     private static final Logger LOGGER = LogManager.getLogger();
     private final IHyracksTaskContext ctx;
@@ -85,7 +90,7 @@
         state = new ExternalGroupState(ctx.getJobletContext().getJobId(), stateId);
         ISpillableTable table = spillableTableFactory.buildSpillableTable(ctx, tableSize, fileSize, keyFields,
                 comparators, firstNormalizerComputer, aggregatorFactory, inRecordDescriptor, outRecordDescriptor,
-                framesLimit, 0);
+                framesLimit, INIT_SEED);
         RunFileWriter[] runFileWriters = new RunFileWriter[table.getNumPartitions()];
         this.externalGroupBy = new ExternalHashGroupBy(this, table, runFileWriters, inRecordDescriptor);
 
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
index 97f9c24..c142113 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
@@ -107,6 +107,12 @@
  */
 
 public class OptimizedHybridHashJoinOperatorDescriptor extends AbstractOperatorDescriptor {
+    /**
+     * Use a random seed to avoid hash collision with the hash exchange operator.
+     * See https://issues.apache.org/jira/browse/ASTERIXDB-2783 for more details.
+     */
+    private static final int INIT_SEED = 982028031;
+
     private static final int BUILD_AND_PARTITION_ACTIVITY_ID = 0;
     private static final int PARTITION_AND_JOIN_ACTIVITY_ID = 1;
 
@@ -269,10 +275,11 @@
                         ctx.getJobletContext().getJobId(), new TaskId(getActivityId(), partition));
 
                 ITuplePartitionComputer probeHpc =
-                        new FieldHashPartitionComputerFamily(probeKeys, propHashFunctionFactories).createPartitioner(0);
+                        new FieldHashPartitionComputerFamily(probeKeys, propHashFunctionFactories)
+                                .createPartitioner(INIT_SEED);
                 ITuplePartitionComputer buildHpc =
                         new FieldHashPartitionComputerFamily(buildKeys, buildHashFunctionFactories)
-                                .createPartitioner(0);
+                                .createPartitioner(INIT_SEED);
                 boolean failed = false;
 
                 @Override