1. fix asterixdb issue 782
--- push nested pipeline before a nested group-by operator into the combiner group-by operator in the AbstractIntroduceGroupByCombinerRule
--- add a processNullTest abstract method in the AbstractIntroduceGroupByCombinerRule
-- fix the join order in a subplan
2. allow user-configurable buffer cache page size (B-tree page size) in Pregelix
commit 4d9a11d0c05281a41bbabe03066478fe851b3a2b
Author: buyingyi <buyingyi@gmail.com>
Change-Id: Ib7761370df8606c55ac34c126554319586e824f0
Reviewed-on: http://fulliautomatix.ics.uci.edu:8443/64
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Till Westmann <westmann@gmail.com>
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/PregelixHyracksIntegrationUtil.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/PregelixHyracksIntegrationUtil.java
index 13a08b7..c584971 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/PregelixHyracksIntegrationUtil.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/PregelixHyracksIntegrationUtil.java
@@ -15,6 +15,7 @@
package edu.uci.ics.pregelix.core.util;
import java.io.File;
+import java.util.Collections;
import java.util.EnumSet;
import org.apache.commons.io.FileUtils;
@@ -78,6 +79,7 @@
ncConfig1.nodeId = NC1_ID;
ncConfig1.ioDevices = "dev1,dev2";
ncConfig1.appNCMainClass = NCApplicationEntryPoint.class.getName();
+ ncConfig1.appArgs = Collections.singletonList("65536");
nc1 = new NodeControllerService(ncConfig1);
nc1.start();
@@ -90,6 +92,7 @@
ncConfig2.nodeId = NC2_ID;
ncConfig2.appNCMainClass = NCApplicationEntryPoint.class.getName();
ncConfig2.ioDevices = "dev3,dev4";
+ ncConfig2.appArgs = Collections.singletonList("65536");
nc2 = new NodeControllerService(ncConfig2);
nc2.start();
diff --git a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/context/RuntimeContext.java b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/context/RuntimeContext.java
index a8307d7..0f10b4d 100644
--- a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/context/RuntimeContext.java
+++ b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/context/RuntimeContext.java
@@ -66,8 +66,8 @@
}
};
- public RuntimeContext(INCApplicationContext appCtx) {
- int pageSize = 64 * 1024;
+ public RuntimeContext(INCApplicationContext appCtx, int vFrameSize) {
+ int pageSize = vFrameSize;
long memSize = Runtime.getRuntime().maxMemory();
long bufferSize = memSize / 4;
int numPages = (int) (bufferSize / pageSize);
diff --git a/pregelix/pregelix-dist/src/main/resources/conf/cluster.properties b/pregelix/pregelix-dist/src/main/resources/conf/cluster.properties
index 79f42ed..82a7bfe 100644
--- a/pregelix/pregelix-dist/src/main/resources/conf/cluster.properties
+++ b/pregelix/pregelix-dist/src/main/resources/conf/cluster.properties
@@ -16,7 +16,7 @@
CC_CLIENTPORT=3099
#The CC port for Hyracks cluster management
-CC_CLUSTERPORT=1099
+CC_CLUSTERPORT=4099
#The CC port for REST communication
CC_HTTPPORT=16001
@@ -42,6 +42,9 @@
#The frame size of the internal dataflow engine
FRAME_SIZE=65536
+#The frame size of the vertex storage, it should be larger than the largest vertex byte size
+VFRAME_SIZE=65536
+
#The number of jobs whose logs are kept in-memory on the CC
JOB_HISTORY_SIZE=0
diff --git a/pregelix/pregelix-dist/src/main/resources/scripts/startnc.sh b/pregelix/pregelix-dist/src/main/resources/scripts/startnc.sh
index f9b6a4e..1cba489 100644
--- a/pregelix/pregelix-dist/src/main/resources/scripts/startnc.sh
+++ b/pregelix/pregelix-dist/src/main/resources/scripts/startnc.sh
@@ -96,7 +96,7 @@
cmd=( "${PREGELIX_HOME}/bin/pregelixnc" )
cmd+=( -cc-host $CCHOST -cc-port $CC_CLUSTERPORT
-cluster-net-ip-address $IPADDR -data-ip-address $IPADDR -result-ip-address $IPADDR
- -node-id $NODEID -iodevices "${IO_DIRS}" -net-buffer-count 5 );
+ -node-id $NODEID -iodevices "${IO_DIRS}" -net-buffer-count 5 -- ${VFRAME_SIZE});
printf "\n\n\n********************************************\nStarting NC with command %s\n\n" "${cmd[*]}" >> "$NCLOGS_DIR/$NODEID.log"
${cmd[@]} >> "$NCLOGS_DIR/$NODEID.log" 2>&1 &
diff --git a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/bootstrap/NCApplicationEntryPoint.java b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/bootstrap/NCApplicationEntryPoint.java
index fe72d7a..8ca227a 100644
--- a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/bootstrap/NCApplicationEntryPoint.java
+++ b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/bootstrap/NCApplicationEntryPoint.java
@@ -23,7 +23,11 @@
@Override
public void start(INCApplicationContext ncAppCtx, String[] args) throws Exception {
- rCtx = new RuntimeContext(ncAppCtx);
+ int vFrameSize = 65536;
+ if(args.length >0){
+ vFrameSize = Integer.parseInt(args[0]);
+ }
+ rCtx = new RuntimeContext(ncAppCtx, vFrameSize);
ncAppCtx.setApplicationObject(rCtx);
}