[Swift-commit] r4144 - SwiftApps/SwiftR/Swift/exec

noreply at svn.ci.uchicago.edu noreply at svn.ci.uchicago.edu
Fri Feb 25 09:45:04 CST 2011


Author: tga
Date: 2011-02-25 09:45:03 -0600 (Fri, 25 Feb 2011)
New Revision: 4144

Modified:
   SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh
   SwiftApps/SwiftR/Swift/exec/rserver.swift
Log:
Partially implemented timeout when Rscript fails to launch on worker.  Now need to pass the error from start-swift to R


Modified: SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh
===================================================================
--- SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh	2011-02-24 23:27:03 UTC (rev 4143)
+++ SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh	2011-02-25 15:45:03 UTC (rev 4144)
@@ -3,6 +3,8 @@
 # Arguments: inputBatchSaveFile outputBatchSaveFile
 #   bash @shellscript @RServerScript @rcall @result stdout=@stout stderr=@sterr;
 
+set -x
+
 # Set restrictive umask for duration of script
 # This value prevents any other users from reading
 # or writing
@@ -83,6 +85,39 @@
 
 RPIDS=
 
+TIMEOUT=5
+
+function start_timeout {
+    ppid=$$
+    echo start_timeout
+    trap "timeout_handler" SIGHUP
+    (
+    echo timeout_handler for $ppid sleeping
+    sleep ${TIMEOUT}s
+    echo timeout_handler work up
+    kill -1 $ppid  &> /dev/null #SIGHUP 
+    if [ "$1" != "" ]; then
+        kill -1 $1  &> /dev/null #SIGHUP 
+    fi
+    ) &
+}
+
+function stop_timeout {
+    #DEBUG
+    echo stop_timeout
+    trap "" SIGHUP
+}
+
+function timeout_handler {
+    echo 'Timed out waiting to contact R process'
+    echo 'R log follows:'
+    echo '====================='
+    cat $SLOTDIR/R.log
+    exit 1
+}
+
+
+
 # Try to make slotdir.
 # If the mkdir succeeds, this is the first request to the slot,
 # so we create a new R server and send the current request to it;
@@ -104,17 +139,16 @@
 else
   # wait to make sure fifo exists
   # fromR fifo is created last, so wait for that one
-  if [ ! -p $SLOTDIR/fromR.fifo ]; then
-    sleep 5  # FIXME: try a few times, or wait longer?
-    if [ ! -p $SLOTDIR/fromR.fifo ]; then
-      echo "$0: ERROR: FIFO $SLOTDIR/fromR.fifo did not appear within 5 seconds.\n"
-      exit 1
+  while 1; do
+    if [ -p $SLOTDIR/fromR.fifo ]; then
+      break
     fi
-  fi
+  done
 fi
 
 # Ready to talk to the server: send request and read response
 
+start_timeout
 while true; do
   mkdir $SLOTDIR/mutex
   if [ $? != 0 ]; then
@@ -123,14 +157,20 @@
     break;
   fi
 done
+stop_timeout
 echo DB: Obtained $SLOTDIR/mutex
 
-if echo run $(pwd)/$callFile $(pwd)/$resultFile > $SLOTDIR/toR.fifo
+echo run $(pwd)/$callFile $(pwd)/$resultFile > $SLOTDIR/toR.fifo &
+echopid=$!
+echo echopid $echopid
+start_timeout $echopid
+
+if wait $echopid
 then
     touch $SLOTDIR/lastwrite
 
     echo DB: Sent request
-
+    stop_timeout # started up ok
     echo dummy stderr response 1>&2 # FIXME - testing if this is the provider staging problem (not xfering zero len stderr)
 
     res=$(cat < $SLOTDIR/fromR.fifo)
@@ -146,6 +186,7 @@
 
     echo DB: Freed $SLOTDIR/mutex
 else
+    stop_timeout
     echo "ERROR: Could not write to fifo ok"
     rmdir $SLOTDIR/mutex
 

Modified: SwiftApps/SwiftR/Swift/exec/rserver.swift
===================================================================
--- SwiftApps/SwiftR/Swift/exec/rserver.swift	2011-02-24 23:27:03 UTC (rev 4143)
+++ SwiftApps/SwiftR/Swift/exec/rserver.swift	2011-02-25 15:45:03 UTC (rev 4144)
@@ -10,7 +10,7 @@
 
 app (external e, RData result, file stout, file sterr) runR (file shellscript, file RServerScript, RData rcall)
 {
-  bash "--noprofile" @shellscript @RServerScript @rcall @result stdout=@stout stderr=@sterr;
+  bash @shellscript @RServerScript @rcall @result stdout=@stout stderr=@sterr;
 }
 
 app ack (external e[])




More information about the Swift-commit mailing list