[NO ISSUE][*DB] Handle unchecked exceptions during global recovery
Change-Id: If4766f783a0e1b398d81681be8bc70b8a507d673
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11046
Reviewed-by: Michael Blow <mblow@apache.org>
Reviewed-by: Ian Maxon <imaxon@uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/GlobalRecoveryManager.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/GlobalRecoveryManager.java
index 5870d3a..e1c39a0 100644
--- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/GlobalRecoveryManager.java
+++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/GlobalRecoveryManager.java
@@ -47,11 +47,9 @@
import org.apache.asterix.metadata.utils.MetadataConstants;
import org.apache.hyracks.api.application.ICCServiceContext;
import org.apache.hyracks.api.client.IHyracksClientConnection;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.job.JobId;
import org.apache.hyracks.api.job.JobSpecification;
import org.apache.hyracks.util.ExitUtil;
-import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -95,15 +93,19 @@
synchronized (this) {
if (!recovering) {
recovering = true;
- /**
+ /*
* Perform recovery on a different thread to avoid deadlocks in
* {@link org.apache.asterix.common.cluster.IClusterStateManager}
*/
serviceCtx.getControllerService().getExecutor().submit(() -> {
try {
recover(appCtx);
- } catch (HyracksDataException e) {
- LOGGER.log(Level.ERROR, "Global recovery failed. Shutting down...", e);
+ } catch (Throwable e) {
+ try {
+ LOGGER.fatal("Global recovery failed. Shutting down...", e);
+ } catch (Throwable ignore) {
+ // ignoring exception trying to log, just do the halt
+ }
ExitUtil.exit(ExitUtil.EC_FAILED_TO_RECOVER);
}
});
@@ -112,24 +114,20 @@
}
}
- protected void recover(ICcApplicationContext appCtx) throws HyracksDataException {
- try {
- LOGGER.info("Starting Global Recovery");
- MetadataManager.INSTANCE.init();
- MetadataTransactionContext mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
- if (appCtx.getStorageProperties().isStorageGlobalCleanup()) {
- int storageGlobalCleanupTimeout = appCtx.getStorageProperties().getStorageGlobalCleanupTimeout();
- performGlobalStorageCleanup(mdTxnCtx, storageGlobalCleanupTimeout);
- }
- mdTxnCtx = doRecovery(appCtx, mdTxnCtx);
- MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
- recoveryCompleted = true;
- recovering = false;
- LOGGER.info("Global Recovery Completed. Refreshing cluster state...");
- appCtx.getClusterStateManager().refreshState();
- } catch (Exception e) {
- throw HyracksDataException.create(e);
+ protected void recover(ICcApplicationContext appCtx) throws Exception {
+ LOGGER.info("Starting Global Recovery");
+ MetadataManager.INSTANCE.init();
+ MetadataTransactionContext mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
+ if (appCtx.getStorageProperties().isStorageGlobalCleanup()) {
+ int storageGlobalCleanupTimeout = appCtx.getStorageProperties().getStorageGlobalCleanupTimeout();
+ performGlobalStorageCleanup(mdTxnCtx, storageGlobalCleanupTimeout);
}
+ mdTxnCtx = doRecovery(appCtx, mdTxnCtx);
+ MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
+ recoveryCompleted = true;
+ recovering = false;
+ LOGGER.info("Global Recovery Completed. Refreshing cluster state...");
+ appCtx.getClusterStateManager().refreshState();
}
protected void performGlobalStorageCleanup(MetadataTransactionContext mdTxnCtx, int storageGlobalCleanupTimeoutSecs)