[ASTERIXDB-2042][CLUS] Shutdown NC on Startup Completion Failure
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- Report runtime exceptions of NC startup completion to CC.
- Shutdown NC after reporting startup completion failure.
Change-Id: I6c2ff0130e5e3e35ccf42a66d6855e568dce1fbe
Reviewed-on: https://asterix-gerrit.ics.uci.edu/1943
Sonar-Qube: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Michael Blow <mblow@apache.org>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: abdullah alamoudi <bamousaa@gmail.com>
diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/message/NCLifecycleTaskReportMessage.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/message/NCLifecycleTaskReportMessage.java
index a01d70a..2b32e1f 100644
--- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/message/NCLifecycleTaskReportMessage.java
+++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/message/NCLifecycleTaskReportMessage.java
@@ -28,7 +28,7 @@
private static final long serialVersionUID = 1L;
private final String nodeId;
private final boolean success;
- private Exception exception;
+ private Throwable exception;
public NCLifecycleTaskReportMessage(String nodeId, boolean success) {
this.nodeId = nodeId;
@@ -48,11 +48,11 @@
return success;
}
- public Exception getException() {
+ public Throwable getException() {
return exception;
}
- public void setException(Exception exception) {
+ public void setException(Throwable exception) {
this.exception = exception;
}
diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/message/StartupTaskResponseMessage.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/message/StartupTaskResponseMessage.java
index aaf3eb8..1611507 100644
--- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/message/StartupTaskResponseMessage.java
+++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/message/StartupTaskResponseMessage.java
@@ -29,6 +29,7 @@
import org.apache.asterix.common.replication.INCLifecycleMessage;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.service.IControllerService;
+import org.apache.hyracks.control.nc.NCShutdownHook;
public class StartupTaskResponseMessage implements INCLifecycleMessage, INcAddressedMessage {
@@ -47,22 +48,28 @@
INCMessageBroker broker = (INCMessageBroker) appCtx.getServiceContext().getMessageBroker();
IControllerService cs = appCtx.getServiceContext().getControllerService();
boolean success = true;
- HyracksDataException exception = null;
try {
- for (INCLifecycleTask task : tasks) {
- task.perform(cs);
+ Throwable exception = null;
+ try {
+ for (INCLifecycleTask task : tasks) {
+ task.perform(cs);
+ }
+ } catch (Throwable e) { //NOSONAR all startup failures should be reported to CC
+ success = false;
+ exception = e;
}
- } catch (HyracksDataException e) {
- success = false;
- exception = e;
- }
- NCLifecycleTaskReportMessage result = new NCLifecycleTaskReportMessage(nodeId, success);
- result.setException(exception);
- try {
- broker.sendMessageToCC(result);
- } catch (Exception e) {
- LOGGER.log(Level.SEVERE, "Failed sending message to cc", e);
- throw HyracksDataException.create(e);
+ NCLifecycleTaskReportMessage result = new NCLifecycleTaskReportMessage(nodeId, success);
+ result.setException(exception);
+ try {
+ broker.sendMessageToCC(result);
+ } catch (Exception e) {
+ LOGGER.log(Level.SEVERE, "Failed sending message to cc", e);
+ }
+ } finally {
+ if (!success) {
+ // stop NC so that it can be started again
+ Runtime.getRuntime().exit(NCShutdownHook.FAILED_TO_STARTUP_EXIT_CODE); //NOSONAR startup failed
+ }
}
}
diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/NCShutdownHook.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/NCShutdownHook.java
index 4d0c159..162d912 100644
--- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/NCShutdownHook.java
+++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/NCShutdownHook.java
@@ -27,6 +27,8 @@
* operation is hanging for any reason
*/
public class NCShutdownHook extends Thread {
+
+ public static final int FAILED_TO_STARTUP_EXIT_CODE = 2;
private static final Logger LOGGER = Logger.getLogger(NCShutdownHook.class.getName());
private static final long SHUTDOWN_WAIT_TIME = 10 * 60 * 1000L;
private final Thread watchDog;