Audit work queue blocks/waits, AbstractWork Not Runnable
1. Log a warning for work which blocks and/or waits
2. Make AbstractWork no longer extend Runnable, add javadoc explaining
importance of work completely quickly
3. Minor refactoring due to 2.
Change-Id: I211e4a9e68ee3ac5fa8e02d79b661068734035c7
Reviewed-on: https://asterix-gerrit.ics.uci.edu/1220
Sonar-Qube: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Till Westmann <tillw@apache.org>
diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/client/HyracksClientInterfaceRemoteProxy.java b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/client/HyracksClientInterfaceRemoteProxy.java
index fda9cc3..f8c8512 100644
--- a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/client/HyracksClientInterfaceRemoteProxy.java
+++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/client/HyracksClientInterfaceRemoteProxy.java
@@ -36,6 +36,8 @@
import org.apache.hyracks.ipc.exceptions.IPCException;
public class HyracksClientInterfaceRemoteProxy implements IHyracksClientInterface {
+ private static final int SHUTDOWN_CONNECTION_TIMEOUT_SECS = 30;
+
private final IIPCHandle ipcHandle;
private final RPCInterface rpci;
@@ -127,15 +129,16 @@
HyracksClientInterfaceFunctions.ClusterShutdownFunction csdf =
new HyracksClientInterfaceFunctions.ClusterShutdownFunction(terminateNCService);
rpci.call(ipcHandle, csdf);
+ int i = 0;
// give the CC some time to do final settling after it returns our request
- int seconds = 30;
- while (ipcHandle.isConnected() && --seconds > 0) {
+ while (ipcHandle.isConnected() && i++ < SHUTDOWN_CONNECTION_TIMEOUT_SECS) {
synchronized (this) {
wait(TimeUnit.SECONDS.toMillis(1));
}
}
if (ipcHandle.isConnected()) {
- throw new IPCException("CC refused to release connection after 30 seconds");
+ throw new IPCException("CC refused to release connection after " + SHUTDOWN_CONNECTION_TIMEOUT_SECS
+ + " seconds");
}
}
diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/GetThreadDumpWork.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/GetThreadDumpWork.java
index 7931cf8..889c828 100644
--- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/GetThreadDumpWork.java
+++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/GetThreadDumpWork.java
@@ -27,11 +27,12 @@
import org.apache.hyracks.control.cc.ClusterControllerService;
import org.apache.hyracks.control.cc.NodeControllerState;
+import org.apache.hyracks.control.common.utils.ThreadDumpHelper;
+import org.apache.hyracks.control.common.work.AbstractWork;
import org.apache.hyracks.control.common.work.IResultCallback;
-import org.apache.hyracks.control.common.work.ThreadDumpWork;
-public class GetThreadDumpWork extends ThreadDumpWork {
- private static final Logger LOGGER = Logger.getLogger(ThreadDumpWork.class.getName());
+public class GetThreadDumpWork extends AbstractWork {
+ private static final Logger LOGGER = Logger.getLogger(GetThreadDumpWork.class.getName());
public static final int TIMEOUT_SECS = 60;
private final ClusterControllerService ccs;
@@ -48,10 +49,15 @@
}
@Override
- protected void doRun() throws Exception {
+ public void run() {
if (nodeId == null) {
// null nodeId means the request is for the cluster controller
- callback.setValue(takeDump(ManagementFactory.getThreadMXBean()));
+ try {
+ callback.setValue(ThreadDumpHelper.takeDumpJSON(ManagementFactory.getThreadMXBean()));
+ } catch (Exception e) {
+ LOGGER.log(Level.WARNING, "Exception taking CC thread dump", e);
+ callback.setException(e);
+ }
} else {
final NodeControllerState ncState = ccs.getNodeMap().get(nodeId);
if (ncState == null) {
diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/work/ThreadDumpWork.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/utils/ThreadDumpHelper.java
similarity index 91%
rename from hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/work/ThreadDumpWork.java
rename to hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/utils/ThreadDumpHelper.java
index bf1965d..eacb9e0 100644
--- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/work/ThreadDumpWork.java
+++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/utils/ThreadDumpHelper.java
@@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.hyracks.control.common.work;
+package org.apache.hyracks.control.common.utils;
import java.lang.management.ThreadInfo;
import java.lang.management.ThreadMXBean;
@@ -29,10 +29,13 @@
import org.json.JSONException;
import org.json.JSONObject;
-public abstract class ThreadDumpWork extends SynchronizableWork {
+public class ThreadDumpHelper {
- protected String takeDump(ThreadMXBean threadMXBean) throws JSONException {
- ThreadInfo [] threadInfos = threadMXBean.dumpAllThreads(true, true);
+ private ThreadDumpHelper() {
+ }
+
+ public static String takeDumpJSON(ThreadMXBean threadMXBean) throws JSONException {
+ ThreadInfo[] threadInfos = threadMXBean.dumpAllThreads(true, true);
List<Map<String, Object>> threads = new ArrayList<>();
for (ThreadInfo thread : threadInfos) {
diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/work/AbstractWork.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/work/AbstractWork.java
index 9376370..076dd66 100644
--- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/work/AbstractWork.java
+++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/work/AbstractWork.java
@@ -31,6 +31,12 @@
return className.substring(className.lastIndexOf('.') + 1, endIndex);
}
+ /**
+ * run is executed on a single thread that services the work queue. As a result run should never wait or block as
+ * this will delay processing for the whole queue.
+ */
+ public abstract void run();
+
@Override
public String toString() {
return getName();
diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/work/WorkQueue.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/work/WorkQueue.java
index fe0821f..f1b00ab 100644
--- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/work/WorkQueue.java
+++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/work/WorkQueue.java
@@ -18,8 +18,10 @@
*/
package org.apache.hyracks.control.common.work;
+import java.lang.management.ManagementFactory;
+import java.lang.management.ThreadInfo;
+import java.lang.management.ThreadMXBean;
import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.Semaphore;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -33,11 +35,11 @@
private final LinkedBlockingQueue<AbstractWork> queue;
private final WorkerThread thread;
- private final Semaphore stopSemaphore;
private boolean stopped;
private AtomicInteger enqueueCount;
private AtomicInteger dequeueCount;
private int threadPriority = Thread.MAX_PRIORITY;
+ private final ThreadMXBean threadMXBean = ManagementFactory.getThreadMXBean();
public WorkQueue(String id, int threadPriority) {
if (threadPriority != Thread.MAX_PRIORITY && threadPriority != Thread.NORM_PRIORITY
@@ -47,21 +49,14 @@
this.threadPriority = threadPriority;
queue = new LinkedBlockingQueue<>();
thread = new WorkerThread(id);
- stopSemaphore = new Semaphore(1);
stopped = true;
- if(DEBUG) {
+ if (DEBUG) {
enqueueCount = new AtomicInteger(0);
dequeueCount = new AtomicInteger(0);
}
}
public void start() throws HyracksException {
- try {
- stopSemaphore.acquire();
- } catch (InterruptedException e) {
- Thread.currentThread().interrupt();
- throw new HyracksException(e);
- }
if (DEBUG) {
enqueueCount.set(0);
dequeueCount.set(0);
@@ -76,7 +71,7 @@
}
thread.interrupt();
try {
- stopSemaphore.acquire();
+ thread.join();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new HyracksException(e);
@@ -107,35 +102,46 @@
@Override
public void run() {
- try {
- AbstractWork r;
- while (true) {
- synchronized (WorkQueue.this) {
- if (stopped) {
- return;
- }
- }
- try {
- r = queue.take();
- } catch (InterruptedException e) { // NOSONAR: aborting the thread
- break;
- }
- if (DEBUG) {
- LOGGER.log(Level.FINEST,
- "Dequeue (" + WorkQueue.this.hashCode() + "): " + dequeueCount.incrementAndGet() + "/"
- + enqueueCount);
- }
- try {
- if (LOGGER.isLoggable(r.logLevel())) {
- LOGGER.log(r.logLevel(), "Executing: " + r);
- }
- r.run();
- } catch (Exception e) {
- LOGGER.log(Level.WARNING, "Exception while executing " + r, e);
+ AbstractWork r;
+ while (true) {
+ synchronized (WorkQueue.this) {
+ if (stopped) {
+ return;
}
}
- } finally {
- stopSemaphore.release();
+ try {
+ r = queue.take();
+ } catch (InterruptedException e) { // NOSONAR: aborting the thread
+ break;
+ }
+ if (DEBUG) {
+ LOGGER.log(Level.FINEST,
+ "Dequeue (" + WorkQueue.this.hashCode() + "): " + dequeueCount.incrementAndGet() + "/"
+ + enqueueCount);
+ }
+ if (LOGGER.isLoggable(r.logLevel())) {
+ LOGGER.log(r.logLevel(), "Executing: " + r);
+ }
+ ThreadInfo before = threadMXBean.getThreadInfo(thread.getId());
+ try {
+ r.run();
+ } catch (Exception e) {
+ LOGGER.log(Level.WARNING, "Exception while executing " + r, e);
+ } finally {
+ auditWaitsAndBlocks(r, before);
+ }
+ }
+ }
+
+ protected void auditWaitsAndBlocks(AbstractWork r, ThreadInfo before) {
+ ThreadInfo after = threadMXBean.getThreadInfo(thread.getId());
+ final long waitedDelta = after.getWaitedCount() - before.getWaitedCount();
+ final long blockedDelta = after.getBlockedCount() - before.getBlockedCount();
+ if (waitedDelta > 0 || blockedDelta > 0) {
+ LOGGER.warning("Work " + r + " waited " + waitedDelta + " times (~"
+ + (after.getWaitedTime() - before.getWaitedTime()) + "ms), blocked " + blockedDelta
+ + " times (~" + (after.getBlockedTime() - before.getBlockedTime()) + "ms)"
+ );
}
}
}
diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/NodeControllerService.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/NodeControllerService.java
index d2d4811..ed46b53 100644
--- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/NodeControllerService.java
+++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/NodeControllerService.java
@@ -85,10 +85,10 @@
import org.apache.hyracks.control.nc.work.CleanupJobletWork;
import org.apache.hyracks.control.nc.work.DeployBinaryWork;
import org.apache.hyracks.control.nc.work.ReportPartitionAvailabilityWork;
-import org.apache.hyracks.control.nc.work.ShutdownWork;
+import org.apache.hyracks.control.nc.task.ShutdownTask;
import org.apache.hyracks.control.nc.work.StartTasksWork;
import org.apache.hyracks.control.nc.work.StateDumpWork;
-import org.apache.hyracks.control.nc.work.NodeThreadDumpWork;
+import org.apache.hyracks.control.nc.task.ThreadDumpTask;
import org.apache.hyracks.control.nc.work.UnDeployBinaryWork;
import org.apache.hyracks.ipc.api.IIPCHandle;
import org.apache.hyracks.ipc.api.IIPCI;
@@ -573,12 +573,12 @@
case SHUTDOWN_REQUEST:
final CCNCFunctions.ShutdownRequestFunction sdrf = (CCNCFunctions.ShutdownRequestFunction) fn;
- executor.submit(new ShutdownWork(NodeControllerService.this, sdrf.isTerminateNCService()));
+ executor.submit(new ShutdownTask(NodeControllerService.this, sdrf.isTerminateNCService()));
return;
case THREAD_DUMP_REQUEST:
final CCNCFunctions.ThreadDumpRequestFunction tdrf = (CCNCFunctions.ThreadDumpRequestFunction) fn;
- executor.submit(new NodeThreadDumpWork(NodeControllerService.this, tdrf.getRequestId()));
+ executor.submit(new ThreadDumpTask(NodeControllerService.this, tdrf.getRequestId()));
return;
default:
diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/work/ShutdownWork.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/task/ShutdownTask.java
similarity index 89%
rename from hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/work/ShutdownWork.java
rename to hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/task/ShutdownTask.java
index c195c98..cdbd4ad 100644
--- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/work/ShutdownWork.java
+++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/task/ShutdownTask.java
@@ -17,21 +17,20 @@
* under the License.
*/
-package org.apache.hyracks.control.nc.work;
+package org.apache.hyracks.control.nc.task;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.hyracks.control.common.base.IClusterController;
-import org.apache.hyracks.control.common.work.AbstractWork;
import org.apache.hyracks.control.nc.NodeControllerService;
-public class ShutdownWork extends AbstractWork {
- private static final Logger LOGGER = Logger.getLogger(ShutdownWork.class.getName());
+public class ShutdownTask implements Runnable {
+ private static final Logger LOGGER = Logger.getLogger(ShutdownTask.class.getName());
private final NodeControllerService ncs;
private final boolean terminateNCService;
- public ShutdownWork(NodeControllerService ncs, boolean terminateNCService) {
+ public ShutdownTask(NodeControllerService ncs, boolean terminateNCService) {
this.ncs = ncs;
this.terminateNCService = terminateNCService;
}
diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/task/ThreadDumpTask.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/task/ThreadDumpTask.java
new file mode 100644
index 0000000..68d9223
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/task/ThreadDumpTask.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hyracks.control.nc.task;
+
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.hyracks.control.common.utils.ThreadDumpHelper;
+import org.apache.hyracks.control.nc.NodeControllerService;
+
+public class ThreadDumpTask implements Runnable {
+ private static final Logger LOGGER = Logger.getLogger(ThreadDumpTask.class.getName());
+ private final NodeControllerService ncs;
+ private final String requestId;
+
+ public ThreadDumpTask(NodeControllerService ncs, String requestId) {
+ this.ncs = ncs;
+ this.requestId = requestId;
+ }
+
+ @Override
+ public void run() {
+ String result;
+ try {
+ result = ThreadDumpHelper.takeDumpJSON(ncs.getThreadMXBean());
+ } catch (Exception e) {
+ LOGGER.log(Level.WARNING, "Exception taking thread dump", e);
+ result = null;
+ }
+ try {
+ ncs.getClusterController().notifyThreadDump(
+ ncs.getApplicationContext().getNodeId(), requestId, result);
+ } catch (Exception e) {
+ LOGGER.log(Level.WARNING, "Exception sending thread dump to CC", e);
+ }
+ }
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/work/NodeThreadDumpWork.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/work/NodeThreadDumpWork.java
deleted file mode 100644
index 85233b2..0000000
--- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/work/NodeThreadDumpWork.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.hyracks.control.nc.work;
-
-import org.apache.hyracks.control.common.work.ThreadDumpWork;
-import org.apache.hyracks.control.nc.NodeControllerService;
-
-public class NodeThreadDumpWork extends ThreadDumpWork {
- private final NodeControllerService ncs;
- private final String requestId;
-
- public NodeThreadDumpWork(NodeControllerService ncs, String requestId) {
- this.ncs = ncs;
- this.requestId = requestId;
- }
-
- @Override
- protected void doRun() throws Exception {
- final String result = takeDump(ncs.getThreadMXBean());
- ncs.getClusterController().notifyThreadDump(
- ncs.getApplicationContext().getNodeId(), requestId, result);
- }
-}