[NO ISSUE] Set MaxGCPauseMillis to not exceed 1/2 of dead node detection threshold

Help prevent nodes under heavy gc from missing too many heartbeats

Change-Id: I7e51db5ccfbb4771ba1f6e0264abfd69f833e7e7
Reviewed-on: https://asterix-gerrit.ics.uci.edu/2426
Sonar-Qube: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Contrib: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Murtadha Hubail <mhubail@apache.org>
diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java
index aa7a4fe..daed5e4 100644
--- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java
+++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java
@@ -18,6 +18,8 @@
  */
 package org.apache.hyracks.control.cc.work;
 
+import static org.apache.hyracks.api.config.Section.LOCALNC;
+import static org.apache.hyracks.api.config.Section.NC;
 import static org.apache.hyracks.control.common.controllers.ServiceConstants.NC_SERVICE_MAGIC_COOKIE;
 
 import java.io.IOException;
@@ -25,8 +27,9 @@
 import java.io.StringWriter;
 import java.net.Socket;
 
-import org.apache.hyracks.api.config.Section;
+import org.apache.hyracks.api.config.IApplicationConfig;
 import org.apache.hyracks.control.cc.ClusterControllerService;
+import org.apache.hyracks.control.common.config.ConfigManager;
 import org.apache.hyracks.control.common.controllers.NCConfig;
 import org.apache.hyracks.control.common.controllers.ServiceConstants.ServiceCommand;
 import org.apache.hyracks.control.common.work.AbstractWork;
@@ -34,6 +37,7 @@
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.ini4j.Ini;
+import org.ini4j.Profile.Section;
 
 /**
  * A work which is run at CC startup for each NC specified in the configuration file.
@@ -42,6 +46,7 @@
 public class TriggerNCWork extends AbstractWork {
 
     private static final Logger LOGGER = LogManager.getLogger();
+    private static final String JVM_ARG_MAX_GCPAUSE_MILLIS = "-XX:MaxGCPauseMillis=";
 
     private final ClusterControllerService ccs;
     private final String ncHost;
@@ -64,7 +69,7 @@
                     ObjectOutputStream oos = new ObjectOutputStream(s.getOutputStream());
                     oos.writeUTF(NC_SERVICE_MAGIC_COOKIE);
                     oos.writeUTF(ServiceCommand.START_NC.name());
-                    oos.writeUTF(TriggerNCWork.this.serializeIni(ccs.getCCConfig().getIni()));
+                    oos.writeUTF(TriggerNCWork.this.serializeIni());
                     oos.close();
                     return;
                     // QQQ Should probably have an ACK here
@@ -83,21 +88,52 @@
 
     /**
      * Given an Ini object, serialize it to String with some enhancements.
-     * @param ccini the ini file to decorate and forward to NC
      */
-    private String serializeIni(Ini ccini) throws IOException {
+    private String serializeIni() throws IOException {
         StringWriter iniString = new StringWriter();
-        ccini.get(Section.NC.sectionName()).putIfAbsent(NCConfig.Option.CLUSTER_ADDRESS.ini(),
-                ccs.getCCConfig().getClusterPublicAddress());
-        ccini.get(Section.NC.sectionName()).putIfAbsent(NCConfig.Option.CLUSTER_PORT.ini(),
-                String.valueOf(ccs.getCCConfig().getClusterPublicPort()));
+        ConfigManager configManager = ccs.getCCConfig().getConfigManager();
+        Ini ccini = configManager.toIni(false);
+        IApplicationConfig ncConfig = configManager.getNodeEffectiveConfig(ncId);
+        Section ncSection = getNcSection(ccini);
+        configClusterAddress(ncConfig, ncSection);
+        configMaxGcPause(ncConfig, ncSection);
         // Finally insert *this* NC's name into localnc section - this is a fixed
         // entry point so that NCs can determine where all their config is.
-        ccini.put(Section.LOCALNC.sectionName(), NCConfig.Option.NODE_ID.ini(), ncId);
+        ccini.put(LOCALNC.sectionName(), NCConfig.Option.NODE_ID.ini(), ncId);
         ccini.store(iniString);
         if (LOGGER.isDebugEnabled()) {
             LOGGER.debug("Returning Ini file:\n" + iniString.toString());
         }
         return iniString.toString();
     }
+
+    private Section getNcSection(Ini ccini) {
+        String sectionName = NC.sectionName() + "/" + ncId;
+        Section ncSection = ccini.get(sectionName);
+        if (ncSection == null) {
+            ncSection = ccini.add(sectionName);
+        }
+        return ncSection;
+    }
+
+    private void configClusterAddress(IApplicationConfig ncConfig, Section ncSection) {
+        if (ncConfig.getString(NCConfig.Option.CLUSTER_ADDRESS) == null) {
+            ncSection.put(NCConfig.Option.CLUSTER_ADDRESS.ini(), ccs.getCCConfig().getClusterPublicAddress());
+            ncSection.put(NCConfig.Option.CLUSTER_PORT.ini(), String.valueOf(ccs.getCCConfig().getClusterPublicPort()));
+        }
+    }
+
+    private void configMaxGcPause(IApplicationConfig ncConfig, Section ncSection) {
+        // if not already configured, set GC max pause time millis to not exceed 1/2 the total max heartbeat miss period
+        String ncJvmArgs = ncConfig.getString(NCConfig.Option.JVM_ARGS);
+        if (ncJvmArgs == null || !ncJvmArgs.contains(JVM_ARG_MAX_GCPAUSE_MILLIS)) {
+            String gcMaxPauseArg = JVM_ARG_MAX_GCPAUSE_MILLIS + getGcMaxPauseMillis();
+            ncSection.put(NCConfig.Option.JVM_ARGS.ini(),
+                    ncJvmArgs == null ? gcMaxPauseArg : ncJvmArgs + " " + gcMaxPauseArg);
+        }
+    }
+
+    private long getGcMaxPauseMillis() {
+        return ccs.getCCConfig().getHeartbeatPeriodMillis() * ccs.getCCConfig().getHeartbeatMaxMisses() / 2;
+    }
 }