[NO ISSUE][STO] Cleanup corrupted resources on failed creation
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- In some cases, when a rebalance is cancelled by the user,
we abort the process of creating local resources on
nodes. When that happens, it can leave corrupted
resources that causes subsequent drop and/or create
to fail until the node is restarted.
- To avoid this, we make sure that the operation that
creates the resource is atomic.
Change-Id: I095a8eb0f1be2a9aa0fc269770978691746c3cec
Reviewed-on: https://asterix-gerrit.ics.uci.edu/2800
Sonar-Qube: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Contrib: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: abdullah alamoudi <bamousaa@gmail.com>
Reviewed-by: Murtadha Hubail <mhubail@apache.org>
diff --git a/asterixdb/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/PersistentLocalResourceRepository.java b/asterixdb/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/PersistentLocalResourceRepository.java
index 7cd31bb..93d9414 100644
--- a/asterixdb/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/PersistentLocalResourceRepository.java
+++ b/asterixdb/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/PersistentLocalResourceRepository.java
@@ -73,6 +73,7 @@
import org.apache.hyracks.storage.am.lsm.common.impls.AbstractLSMIndexFileManager;
import org.apache.hyracks.storage.common.ILocalResourceRepository;
import org.apache.hyracks.storage.common.LocalResource;
+import org.apache.hyracks.util.ExitUtil;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -182,6 +183,7 @@
return resource;
}
+ @SuppressWarnings("squid:S1181")
@Override
public synchronized void insert(LocalResource resource) throws HyracksDataException {
String relativePath = getFileName(resource.getPath());
@@ -194,16 +196,21 @@
if (!parent.exists() && !parent.mkdirs()) {
throw HyracksDataException.create(CANNOT_CREATE_FILE, parent.getAbsolutePath());
}
- createResourceFileMask(resourceFile);
+ // The next block should be all or nothing
try {
+ createResourceFileMask(resourceFile);
byte[] bytes = OBJECT_MAPPER.writeValueAsBytes(resource.toJson(persistedResourceRegistry));
final Path path = Paths.get(resourceFile.getAbsolutePath());
Files.write(path, bytes);
- } catch (IOException e) {
+ indexCheckpointManagerProvider.get(DatasetResourceReference.of(resource)).init(null, 0);
+ deleteResourceFileMask(resourceFile);
+ } catch (Exception e) {
+ cleanup(resourceFile);
throw HyracksDataException.create(e);
+ } catch (Throwable th) {
+ LOGGER.error("Error creating resource {}", resourceFile, th);
+ ExitUtil.halt(ExitUtil.EC_ERROR_CREATING_RESOURCES);
}
- indexCheckpointManagerProvider.get(DatasetResourceReference.of(resource)).init(null, 0);
- deleteResourceFileMask(resourceFile);
resourceCache.put(resource.getPath(), resource);
//if replication enabled, send resource metadata info to remote nodes
if (isReplicationEnabled) {
@@ -211,6 +218,18 @@
}
}
+ @SuppressWarnings("squid:S1181")
+ private void cleanup(FileReference resourceFile) {
+ if (resourceFile.getFile().exists()) {
+ try {
+ IoUtil.delete(resourceFile);
+ } catch (Throwable th) {
+ LOGGER.error("Error cleaning up corrupted resource {}", resourceFile, th);
+ ExitUtil.halt(ExitUtil.EC_FAILED_TO_DELETE_CORRUPTED_RESOURCES);
+ }
+ }
+ }
+
@Override
public synchronized void delete(String relativePath) throws HyracksDataException {
FileReference resourceFile = getLocalResourceFileByName(ioManager, relativePath);
diff --git a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java
index 14cfc59..f7c401a 100644
--- a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java
+++ b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java
@@ -40,6 +40,8 @@
public static final int EC_INCONSISTENT_METADATA = 8;
public static final int EC_UNCAUGHT_THROWABLE = 9;
public static final int EC_UNHANDLED_EXCEPTION = 11;
+ public static final int EC_FAILED_TO_DELETE_CORRUPTED_RESOURCES = 12;
+ public static final int EC_ERROR_CREATING_RESOURCES = 13;
public static final int EC_FAILED_TO_CANCEL_ACTIVE_START_STOP = 22;
public static final int EC_IMMEDIATE_HALT = 33;
public static final int EC_HALT_ABNORMAL_RESERVED_44 = 44;