[Swift-commit] r4144 - SwiftApps/SwiftR/Swift/exec
noreply at svn.ci.uchicago.edu
noreply at svn.ci.uchicago.edu
Fri Feb 25 09:45:04 CST 2011
Author: tga
Date: 2011-02-25 09:45:03 -0600 (Fri, 25 Feb 2011)
New Revision: 4144
Modified:
SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh
SwiftApps/SwiftR/Swift/exec/rserver.swift
Log:
Partially implemented timeout when Rscript fails to launch on worker. Now need to pass the error from start-swift to R
Modified: SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh
===================================================================
--- SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh 2011-02-24 23:27:03 UTC (rev 4143)
+++ SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh 2011-02-25 15:45:03 UTC (rev 4144)
@@ -3,6 +3,8 @@
# Arguments: inputBatchSaveFile outputBatchSaveFile
# bash @shellscript @RServerScript @rcall @result stdout=@stout stderr=@sterr;
+set -x
+
# Set restrictive umask for duration of script
# This value prevents any other users from reading
# or writing
@@ -83,6 +85,39 @@
RPIDS=
+TIMEOUT=5
+
+function start_timeout {
+ ppid=$$
+ echo start_timeout
+ trap "timeout_handler" SIGHUP
+ (
+ echo timeout_handler for $ppid sleeping
+ sleep ${TIMEOUT}s
+ echo timeout_handler work up
+ kill -1 $ppid &> /dev/null #SIGHUP
+ if [ "$1" != "" ]; then
+ kill -1 $1 &> /dev/null #SIGHUP
+ fi
+ ) &
+}
+
+function stop_timeout {
+ #DEBUG
+ echo stop_timeout
+ trap "" SIGHUP
+}
+
+function timeout_handler {
+ echo 'Timed out waiting to contact R process'
+ echo 'R log follows:'
+ echo '====================='
+ cat $SLOTDIR/R.log
+ exit 1
+}
+
+
+
# Try to make slotdir.
# If the mkdir succeeds, this is the first request to the slot,
# so we create a new R server and send the current request to it;
@@ -104,17 +139,16 @@
else
# wait to make sure fifo exists
# fromR fifo is created last, so wait for that one
- if [ ! -p $SLOTDIR/fromR.fifo ]; then
- sleep 5 # FIXME: try a few times, or wait longer?
- if [ ! -p $SLOTDIR/fromR.fifo ]; then
- echo "$0: ERROR: FIFO $SLOTDIR/fromR.fifo did not appear within 5 seconds.\n"
- exit 1
+ while 1; do
+ if [ -p $SLOTDIR/fromR.fifo ]; then
+ break
fi
- fi
+ done
fi
# Ready to talk to the server: send request and read response
+start_timeout
while true; do
mkdir $SLOTDIR/mutex
if [ $? != 0 ]; then
@@ -123,14 +157,20 @@
break;
fi
done
+stop_timeout
echo DB: Obtained $SLOTDIR/mutex
-if echo run $(pwd)/$callFile $(pwd)/$resultFile > $SLOTDIR/toR.fifo
+echo run $(pwd)/$callFile $(pwd)/$resultFile > $SLOTDIR/toR.fifo &
+echopid=$!
+echo echopid $echopid
+start_timeout $echopid
+
+if wait $echopid
then
touch $SLOTDIR/lastwrite
echo DB: Sent request
-
+ stop_timeout # started up ok
echo dummy stderr response 1>&2 # FIXME - testing if this is the provider staging problem (not xfering zero len stderr)
res=$(cat < $SLOTDIR/fromR.fifo)
@@ -146,6 +186,7 @@
echo DB: Freed $SLOTDIR/mutex
else
+ stop_timeout
echo "ERROR: Could not write to fifo ok"
rmdir $SLOTDIR/mutex
Modified: SwiftApps/SwiftR/Swift/exec/rserver.swift
===================================================================
--- SwiftApps/SwiftR/Swift/exec/rserver.swift 2011-02-24 23:27:03 UTC (rev 4143)
+++ SwiftApps/SwiftR/Swift/exec/rserver.swift 2011-02-25 15:45:03 UTC (rev 4144)
@@ -10,7 +10,7 @@
app (external e, RData result, file stout, file sterr) runR (file shellscript, file RServerScript, RData rcall)
{
- bash "--noprofile" @shellscript @RServerScript @rcall @result stdout=@stout stderr=@sterr;
+ bash @shellscript @RServerScript @rcall @result stdout=@stout stderr=@sterr;
}
app ack (external e[])
More information about the Swift-commit
mailing list