[ASTERIXDB-2388] Add nodeIDs to cluster state query failure exception
Change-Id: I896fd0482e1db8a04dece058aa0975d3d961e731
Reviewed-on: https://asterix-gerrit.ics.uci.edu/3317
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Contrib: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Murtadha Hubail <mhubail@apache.org>
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/AbstractLangTranslator.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/AbstractLangTranslator.java
index 0eb8e0a..967b3ad 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/AbstractLangTranslator.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/AbstractLangTranslator.java
@@ -18,15 +18,20 @@
*/
package org.apache.asterix.translator;
+import java.util.Arrays;
+import java.util.HashSet;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.apache.asterix.common.api.IClusterManagementWork.ClusterState;
+import org.apache.asterix.common.cluster.ClusterPartition;
import org.apache.asterix.common.cluster.IClusterStateManager;
import org.apache.asterix.common.cluster.IGlobalRecoveryManager;
import org.apache.asterix.common.dataflow.ICcApplicationContext;
import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.common.exceptions.ErrorCode;
import org.apache.asterix.lang.common.base.Statement;
import org.apache.asterix.lang.common.statement.DatasetDecl;
import org.apache.asterix.lang.common.statement.DataverseDropStatement;
@@ -68,21 +73,25 @@
}
Thread.currentThread().interrupt();
}
- if (!clusterStateManager.getState().equals(ClusterState.ACTIVE)) {
- throw new AsterixException("Cluster is in " + ClusterState.UNUSABLE + " state."
- + "\n One or more Node Controllers have left or haven't joined yet.\n");
- } else {
- if (LOGGER.isInfoEnabled()) {
- LOGGER.info("Cluster is now " + ClusterState.ACTIVE);
+ synchronized (clusterStateManager) {
+ if (!clusterStateManager.getState().equals(ClusterState.ACTIVE)) {
+ ClusterPartition[] configuredPartitions = clusterStateManager.getClusterPartitons();
+ Set<String> inactiveNodes = new HashSet<>();
+ for (ClusterPartition cp : configuredPartitions) {
+ if (!cp.isActive()) {
+ inactiveNodes.add(cp.getNodeId());
+ }
+ }
+ throw AsterixException.create(ErrorCode.CLUSTER_STATE_UNUSABLE,
+ Arrays.toString(inactiveNodes.toArray()));
+ } else {
+ if (LOGGER.isInfoEnabled()) {
+ LOGGER.info("Cluster is now " + ClusterState.ACTIVE);
+ }
}
}
}
- if (clusterStateManager.getState().equals(ClusterState.UNUSABLE)) {
- throw new AsterixException("Cluster is in " + ClusterState.UNUSABLE + " state."
- + "\n One or more Node Controllers have left.\n");
- }
-
if (!globalRecoveryManager.isRecoveryCompleted()) {
int maxWaitCycles = appCtx.getExternalProperties().getMaxWaitClusterActive();
int waitCycleCount = 0;
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
index 459773b..c9f1c48 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
@@ -33,13 +33,6 @@
private static final String RESOURCE_PATH = "asx_errormsg/en.properties";
public static final String ASTERIX = "ASX";
- // Extension errors
- public static final int EXTENSION_ID_CONFLICT = 4001;
- public static final int EXTENSION_COMPONENT_CONFLICT = 4002;
- public static final int UNSUPPORTED_MESSAGE_TYPE = 4003;
- public static final int INVALID_CONFIGURATION = 4004;
- public static final int UNSUPPORTED_REPLICATION_STRATEGY = 4005;
-
// Runtime errors
public static final int CASTING_FIELD = 1;
public static final int TYPE_MISMATCH_FUNCTION = 2;
@@ -302,6 +295,16 @@
// Lifecycle management errors
public static final int DUPLICATE_PARTITION_ID = 4000;
+ // Extension errors
+ public static final int EXTENSION_ID_CONFLICT = 4001;
+ public static final int EXTENSION_COMPONENT_CONFLICT = 4002;
+ public static final int UNSUPPORTED_MESSAGE_TYPE = 4003;
+ public static final int INVALID_CONFIGURATION = 4004;
+ public static final int UNSUPPORTED_REPLICATION_STRATEGY = 4005;
+
+ // Lifecycle management errors pt.2
+ public static final int CLUSTER_STATE_UNUSABLE = 4006;
+
private ErrorCode() {
}
diff --git a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
index 94d6942..ddaf271 100644
--- a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
+++ b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
@@ -291,3 +291,4 @@
# Lifecycle management errors
4000 = Partition id %1$s for node %2$s already in use by node %3$s
+4006 = Not all node controllers required for request execution have joined the cluster. Nodes %1$s appear missing, double check the logs on these machines and the cluster configuration