[NO ISSUE] Set MaxGCPauseMillis to not exceed 1/2 of dead node detection threshold
Help prevent nodes under heavy gc from missing too many heartbeats
Change-Id: I7e51db5ccfbb4771ba1f6e0264abfd69f833e7e7
Reviewed-on: https://asterix-gerrit.ics.uci.edu/2426
Sonar-Qube: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Contrib: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Murtadha Hubail <mhubail@apache.org>
diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java
index aa7a4fe..daed5e4 100644
--- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java
+++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java
@@ -18,6 +18,8 @@
*/
package org.apache.hyracks.control.cc.work;
+import static org.apache.hyracks.api.config.Section.LOCALNC;
+import static org.apache.hyracks.api.config.Section.NC;
import static org.apache.hyracks.control.common.controllers.ServiceConstants.NC_SERVICE_MAGIC_COOKIE;
import java.io.IOException;
@@ -25,8 +27,9 @@
import java.io.StringWriter;
import java.net.Socket;
-import org.apache.hyracks.api.config.Section;
+import org.apache.hyracks.api.config.IApplicationConfig;
import org.apache.hyracks.control.cc.ClusterControllerService;
+import org.apache.hyracks.control.common.config.ConfigManager;
import org.apache.hyracks.control.common.controllers.NCConfig;
import org.apache.hyracks.control.common.controllers.ServiceConstants.ServiceCommand;
import org.apache.hyracks.control.common.work.AbstractWork;
@@ -34,6 +37,7 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.ini4j.Ini;
+import org.ini4j.Profile.Section;
/**
* A work which is run at CC startup for each NC specified in the configuration file.
@@ -42,6 +46,7 @@
public class TriggerNCWork extends AbstractWork {
private static final Logger LOGGER = LogManager.getLogger();
+ private static final String JVM_ARG_MAX_GCPAUSE_MILLIS = "-XX:MaxGCPauseMillis=";
private final ClusterControllerService ccs;
private final String ncHost;
@@ -64,7 +69,7 @@
ObjectOutputStream oos = new ObjectOutputStream(s.getOutputStream());
oos.writeUTF(NC_SERVICE_MAGIC_COOKIE);
oos.writeUTF(ServiceCommand.START_NC.name());
- oos.writeUTF(TriggerNCWork.this.serializeIni(ccs.getCCConfig().getIni()));
+ oos.writeUTF(TriggerNCWork.this.serializeIni());
oos.close();
return;
// QQQ Should probably have an ACK here
@@ -83,21 +88,52 @@
/**
* Given an Ini object, serialize it to String with some enhancements.
- * @param ccini the ini file to decorate and forward to NC
*/
- private String serializeIni(Ini ccini) throws IOException {
+ private String serializeIni() throws IOException {
StringWriter iniString = new StringWriter();
- ccini.get(Section.NC.sectionName()).putIfAbsent(NCConfig.Option.CLUSTER_ADDRESS.ini(),
- ccs.getCCConfig().getClusterPublicAddress());
- ccini.get(Section.NC.sectionName()).putIfAbsent(NCConfig.Option.CLUSTER_PORT.ini(),
- String.valueOf(ccs.getCCConfig().getClusterPublicPort()));
+ ConfigManager configManager = ccs.getCCConfig().getConfigManager();
+ Ini ccini = configManager.toIni(false);
+ IApplicationConfig ncConfig = configManager.getNodeEffectiveConfig(ncId);
+ Section ncSection = getNcSection(ccini);
+ configClusterAddress(ncConfig, ncSection);
+ configMaxGcPause(ncConfig, ncSection);
// Finally insert *this* NC's name into localnc section - this is a fixed
// entry point so that NCs can determine where all their config is.
- ccini.put(Section.LOCALNC.sectionName(), NCConfig.Option.NODE_ID.ini(), ncId);
+ ccini.put(LOCALNC.sectionName(), NCConfig.Option.NODE_ID.ini(), ncId);
ccini.store(iniString);
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Returning Ini file:\n" + iniString.toString());
}
return iniString.toString();
}
+
+ private Section getNcSection(Ini ccini) {
+ String sectionName = NC.sectionName() + "/" + ncId;
+ Section ncSection = ccini.get(sectionName);
+ if (ncSection == null) {
+ ncSection = ccini.add(sectionName);
+ }
+ return ncSection;
+ }
+
+ private void configClusterAddress(IApplicationConfig ncConfig, Section ncSection) {
+ if (ncConfig.getString(NCConfig.Option.CLUSTER_ADDRESS) == null) {
+ ncSection.put(NCConfig.Option.CLUSTER_ADDRESS.ini(), ccs.getCCConfig().getClusterPublicAddress());
+ ncSection.put(NCConfig.Option.CLUSTER_PORT.ini(), String.valueOf(ccs.getCCConfig().getClusterPublicPort()));
+ }
+ }
+
+ private void configMaxGcPause(IApplicationConfig ncConfig, Section ncSection) {
+ // if not already configured, set GC max pause time millis to not exceed 1/2 the total max heartbeat miss period
+ String ncJvmArgs = ncConfig.getString(NCConfig.Option.JVM_ARGS);
+ if (ncJvmArgs == null || !ncJvmArgs.contains(JVM_ARG_MAX_GCPAUSE_MILLIS)) {
+ String gcMaxPauseArg = JVM_ARG_MAX_GCPAUSE_MILLIS + getGcMaxPauseMillis();
+ ncSection.put(NCConfig.Option.JVM_ARGS.ini(),
+ ncJvmArgs == null ? gcMaxPauseArg : ncJvmArgs + " " + gcMaxPauseArg);
+ }
+ }
+
+ private long getGcMaxPauseMillis() {
+ return ccs.getCCConfig().getHeartbeatPeriodMillis() * ccs.getCCConfig().getHeartbeatMaxMisses() / 2;
+ }
}