[Swift-commit] r4171 - in trunk/src/org/griphyn/vdl/karajan: . lib

noreply at svn.ci.uchicago.edu noreply at svn.ci.uchicago.edu
Sun Mar 6 15:44:41 CST 2011


Author: hategan
Date: 2011-03-06 15:44:41 -0600 (Sun, 06 Mar 2011)
New Revision: 4171

Added:
   trunk/src/org/griphyn/vdl/karajan/HangChecker.java
Modified:
   trunk/src/org/griphyn/vdl/karajan/Loader.java
   trunk/src/org/griphyn/vdl/karajan/Monitor.java
   trunk/src/org/griphyn/vdl/karajan/lib/VDLFunction.java
Log:
added a hang checker to detect when things stop happening and dump future/thread info to the log file

Added: trunk/src/org/griphyn/vdl/karajan/HangChecker.java
===================================================================
--- trunk/src/org/griphyn/vdl/karajan/HangChecker.java	                        (rev 0)
+++ trunk/src/org/griphyn/vdl/karajan/HangChecker.java	2011-03-06 21:44:41 UTC (rev 4171)
@@ -0,0 +1,66 @@
+//----------------------------------------------------------------------
+//This code is developed as part of the Java CoG Kit project
+//The terms of the license can be found at http://www.cogkit.org/license
+//This message may not be removed or altered.
+//----------------------------------------------------------------------
+
+/*
+ * Created on Mar 6, 2011
+ */
+package org.griphyn.vdl.karajan;
+
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+import java.util.Timer;
+import java.util.TimerTask;
+
+import org.apache.log4j.Logger;
+import org.globus.cog.karajan.scheduler.LateBindingScheduler;
+import org.globus.cog.karajan.stack.VariableStack;
+import org.globus.cog.karajan.workflow.ExecutionException;
+import org.globus.cog.karajan.workflow.events.EventBus;
+import org.globus.cog.karajan.workflow.nodes.grid.SchedulerNode;
+import org.griphyn.vdl.karajan.lib.VDLFunction;
+
+public class HangChecker extends TimerTask {
+    public static final Logger logger = Logger.getLogger(HangChecker.class);
+    
+    public static final int CHECK_INTERVAL = 10000;
+    private Timer timer;
+    private long lastEventCount;
+    private WrapperMap map;
+    private VariableStack stack;
+    
+    public HangChecker(VariableStack stack) throws ExecutionException {
+        this.stack = stack;
+        map = VDLFunction.getFutureWrapperMap(stack);
+    }
+
+    public void start() {
+        timer = new Timer("Hang checker");
+        timer.scheduleAtFixedRate(this, CHECK_INTERVAL, CHECK_INTERVAL);
+    }
+
+    public void run() {
+        try {
+            LateBindingScheduler s = (LateBindingScheduler) stack.getGlobal(SchedulerNode.SCHEDULER);
+            if (s != null) {
+                int running = s.getRunning();
+
+                if (running == 0 && EventBus.eventCount == lastEventCount) {
+                    logger.warn("No events in " + (CHECK_INTERVAL / 1000) + "s.");
+                    ByteArrayOutputStream os = new ByteArrayOutputStream();
+                    PrintStream ps = new PrintStream(os);
+                    Monitor.dumpVariables(map, ps);
+                    Monitor.dumpThreads(ps);
+                    logger.warn(os.toString());
+                    ps.close();
+                }
+            }
+            lastEventCount = EventBus.eventCount;
+        }
+        catch (Exception e) {
+            logger.warn("Exception caught during hang check", e);
+        }
+    }
+}

Modified: trunk/src/org/griphyn/vdl/karajan/Loader.java
===================================================================
--- trunk/src/org/griphyn/vdl/karajan/Loader.java	2011-03-06 21:04:28 UTC (rev 4170)
+++ trunk/src/org/griphyn/vdl/karajan/Loader.java	2011-03-06 21:44:41 UTC (rev 4171)
@@ -194,13 +194,12 @@
             }
             ec.setArguments(arguments);
             long start = System.currentTimeMillis();
+            new HangChecker(stack).start();
             ec.start(stack);
             ec.waitFor();
             if (ec.isFailed()) {
                 runerror = true;
             }
-            long end = System.currentTimeMillis();
-            System.out.println("Time: " + (end - start) / 1000.0 + ", rate: " + (16384 * 1000) / (end - start) + " j/s");
         }
         catch (Exception e) {
             logger.debug("Detailed exception:", e);

Modified: trunk/src/org/griphyn/vdl/karajan/Monitor.java
===================================================================
--- trunk/src/org/griphyn/vdl/karajan/Monitor.java	2011-03-06 21:04:28 UTC (rev 4170)
+++ trunk/src/org/griphyn/vdl/karajan/Monitor.java	2011-03-06 21:44:41 UTC (rev 4171)
@@ -209,8 +209,12 @@
 	public void dumpVariables() {
 		dumpVariables(System.out);
 	}
+	
+	public void dumpVariables(PrintStream ps) {
+	    dumpVariables(map, ps);
+	}
 
-	public void dumpVariables(PrintStream ps) {
+	public static void dumpVariables(WrapperMap map, PrintStream ps) {
 		ps.println("\nRegistered futures:");
 		synchronized (map) {
 			Iterator i = map.entrySet().iterator();
@@ -244,7 +248,7 @@
 		dumpThreads(System.out);
 	}
 
-	public void dumpThreads(PrintStream pw) {
+	public static void dumpThreads(PrintStream pw) {
 		pw.println("\nWaiting threads:");
 		Collection c = WaitingThreadsMonitor.getAllThreads();
 		Iterator i = c.iterator();

Modified: trunk/src/org/griphyn/vdl/karajan/lib/VDLFunction.java
===================================================================
--- trunk/src/org/griphyn/vdl/karajan/lib/VDLFunction.java	2011-03-06 21:04:28 UTC (rev 4170)
+++ trunk/src/org/griphyn/vdl/karajan/lib/VDLFunction.java	2011-03-06 21:44:41 UTC (rev 4171)
@@ -376,7 +376,7 @@
 
 	public static final String VDL_FUTURE_WRAPPER_MAP = "#vdl:futureWrapperMap";
 
-	protected static WrapperMap getFutureWrapperMap(VariableStack stack) throws ExecutionException {
+	public static WrapperMap getFutureWrapperMap(VariableStack stack) throws ExecutionException {
 		synchronized (stack.getExecutionContext()) {
 			WrapperMap hash = (WrapperMap) stack.firstFrame().getVar(VDL_FUTURE_WRAPPER_MAP);
 			if (hash == null) {




More information about the Swift-commit mailing list