[NO ISSUE][CLUS] Fail Tasks When NC Not Active
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- Fail StartTasksWork if NC is not in active
state (i.e. hasn't completed startup tasks).
- Set NC state to active only after completing
startup tasks.
- Add test case.
Change-Id: I447d4f1e255211e9026b68eb9d931f25846ed153
Reviewed-on: https://asterix-gerrit.ics.uci.edu/2460
Sonar-Qube: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Contrib: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Michael Blow <mblow@apache.org>
diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/NcLifecycleCoordinator.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/NcLifecycleCoordinator.java
index 844851a..5c497aa 100644
--- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/NcLifecycleCoordinator.java
+++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/NcLifecycleCoordinator.java
@@ -146,7 +146,7 @@
return buildActiveNCRegTasks(isMetadataNode);
}
final List<INCLifecycleTask> tasks = new ArrayList<>();
- tasks.add(new UpdateNodeStatusTask(NodeStatus.ACTIVE));
+ tasks.add(new UpdateNodeStatusTask(NodeStatus.BOOTING));
if (state == SystemState.CORRUPTED) {
//need to perform local recovery for node partitions
LocalRecoveryTask rt = new LocalRecoveryTask(Arrays.asList(clusterManager.getNodePartitions(nodeId))
@@ -167,6 +167,7 @@
tasks.add(new BindMetadataNodeTask());
}
tasks.add(new ReportLocalCountersTask());
+ tasks.add(new UpdateNodeStatusTask(NodeStatus.ACTIVE));
return tasks;
}
diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/NCApplication.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/NCApplication.java
index 0cd01ae..494198b 100644
--- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/NCApplication.java
+++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/NCApplication.java
@@ -89,6 +89,8 @@
@Override
public void init(IServiceContext serviceCtx) throws Exception {
ncServiceCtx = (INCServiceContext) serviceCtx;
+ // set the node status initially to idle to indicate that it is pending booting
+ ((NodeControllerService) serviceCtx.getControllerService()).setNodeStatus(NodeStatus.IDLE);
ncServiceCtx.setThreadFactory(
new AsterixThreadFactory(ncServiceCtx.getThreadFactory(), ncServiceCtx.getLifeCycleComponentManager()));
}
@@ -253,7 +255,7 @@
}
private boolean isPendingStartupTasks(NodeStatus nodeStatus, CcId primaryCc, CcId registeredCc) {
- return nodeStatus == NodeStatus.BOOTING && (primaryCc == null || primaryCc.equals(registeredCc));
+ return nodeStatus == NodeStatus.IDLE && (primaryCc == null || primaryCc.equals(registeredCc));
}
private SystemState getCurrentSystemState() {
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/runtime/NcLifecycleTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/runtime/NcLifecycleTest.java
new file mode 100644
index 0000000..df0740b
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/runtime/NcLifecycleTest.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.runtime;
+
+import org.apache.asterix.api.common.AsterixHyracksIntegrationUtil;
+import org.apache.asterix.common.TestDataUtil;
+import org.apache.asterix.common.config.GlobalConfig;
+import org.apache.asterix.test.common.TestExecutor;
+import org.apache.asterix.testframework.context.TestCaseContext;
+import org.apache.hyracks.api.client.NodeStatus;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class NcLifecycleTest {
+
+ protected static final String TEST_CONFIG_FILE_NAME = "src/main/resources/cc.conf";
+ private static final TestExecutor testExecutor = new TestExecutor();
+ private static final AsterixHyracksIntegrationUtil integrationUtil = new AsterixHyracksIntegrationUtil();
+
+ @Before
+ public void setUp() throws Exception {
+ System.setProperty(GlobalConfig.CONFIG_FILE_PROPERTY, TEST_CONFIG_FILE_NAME);
+ integrationUtil.init(true, TEST_CONFIG_FILE_NAME);
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ integrationUtil.deinit(true);
+ }
+
+ @Test
+ public void nodeNotActiveTest() throws Exception {
+ final String datasetName = "ds";
+ TestDataUtil.createIdOnlyDataset(datasetName);
+ integrationUtil.ncs[0].setNodeStatus(NodeStatus.BOOTING);
+ boolean thrown = false;
+ try {
+ testExecutor.executeSqlppUpdateOrDdl("select count(*) from " + datasetName + ";",
+ TestCaseContext.OutputFormat.CLEAN_JSON);
+ } catch (Exception e) {
+ thrown = true;
+ Assert.assertTrue(e.getMessage().contains(integrationUtil.ncs[0].getId()));
+ }
+ Assert.assertTrue(thrown);
+ }
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/client/NodeStatus.java b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/client/NodeStatus.java
index 10a9a3c..076dd10 100644
--- a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/client/NodeStatus.java
+++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/client/NodeStatus.java
@@ -19,7 +19,8 @@
package org.apache.hyracks.api.client;
public enum NodeStatus {
- ACTIVE,
- BOOTING,
- DEAD
+ ACTIVE, // node is ready to process tasks
+ BOOTING, // node is executing startup tasks
+ DEAD,
+ IDLE
}
diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
index 0dca782..0691005 100644
--- a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
+++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
@@ -147,6 +147,7 @@
public static final int CANNOT_CONTINUE_TEXT_SEARCH_BUFFER_MANAGER_IS_NULL = 111;
public static final int CANNOT_ADD_ELEMENT_TO_INVERTED_INDEX_SEARCH_RESULT = 112;
public static final int UNDEFINED_INVERTED_LIST_MERGE_TYPE = 113;
+ public static final int NODE_IS_NOT_ACTIVE = 114;
// Compilation error codes.
public static final int RULECOLLECTION_NOT_INSTANCE_OF_LIST = 10000;
diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
index 50e1ad4..8635efd 100644
--- a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
+++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
@@ -130,5 +130,6 @@
111 = To conduct an inverted-index search, the given buffer manager cannot be null.
112 = Cannot add an element to an inverted-index search result.
113 = Undefined inverted-list merge type: %1$s
+114 = Node (%1$s) is not active
10000 = The given rule collection %1$s is not an instance of the List class.
diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/NodeControllerService.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/NodeControllerService.java
index b67bfac..2c73df1 100644
--- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/NodeControllerService.java
+++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/NodeControllerService.java
@@ -170,7 +170,7 @@
private final Map<CcId, AtomicLong> maxJobIds = new ConcurrentHashMap<>();
- private NodeStatus status = NodeStatus.BOOTING;
+ private volatile NodeStatus status = NodeStatus.ACTIVE;
private NodeRegistration nodeRegistration;
@@ -633,11 +633,11 @@
return workQueue;
}
- public synchronized NodeStatus getNodeStatus() {
+ public NodeStatus getNodeStatus() {
return status;
}
- public synchronized void setNodeStatus(NodeStatus status) {
+ public void setNodeStatus(NodeStatus status) {
this.status = status;
}
diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/work/StartTasksWork.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/work/StartTasksWork.java
index e229149..6a5785a 100644
--- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/work/StartTasksWork.java
+++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/work/StartTasksWork.java
@@ -27,6 +27,7 @@
import java.util.Set;
import org.apache.hyracks.api.application.INCServiceContext;
+import org.apache.hyracks.api.client.NodeStatus;
import org.apache.hyracks.api.comm.IFrameWriter;
import org.apache.hyracks.api.comm.IPartitionCollector;
import org.apache.hyracks.api.comm.IPartitionWriterFactory;
@@ -45,6 +46,7 @@
import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
import org.apache.hyracks.api.deployment.DeploymentId;
+import org.apache.hyracks.api.exceptions.ErrorCode;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.exceptions.HyracksException;
import org.apache.hyracks.api.job.ActivityCluster;
@@ -116,6 +118,9 @@
ncs.updateMaxJobId(jobId);
NCServiceContext serviceCtx = ncs.getContext();
Joblet joblet = getOrCreateLocalJoblet(deploymentId, serviceCtx, acgBytes);
+ if (ncs.getNodeStatus() != NodeStatus.ACTIVE) {
+ throw HyracksException.create(ErrorCode.NODE_IS_NOT_ACTIVE, ncs.getId());
+ }
final ActivityClusterGraph acg = joblet.getActivityClusterGraph();
IRecordDescriptorProvider rdp = new IRecordDescriptorProvider() {
@Override