[Swift-commit] r3658 - SwiftApps/SwiftR/Swift/exec
noreply at svn.ci.uchicago.edu
noreply at svn.ci.uchicago.edu
Tue Oct 5 16:44:04 CDT 2010
Author: wilde
Date: 2010-10-05 16:44:04 -0500 (Tue, 05 Oct 2010)
New Revision: 3658
Modified:
SwiftApps/SwiftR/Swift/exec/rserver.swift
SwiftApps/SwiftR/Swift/exec/start-swift
Log:
Add support for start-swift ssh case; make rserver.swift run a dummy job on startup to force the coaster provider into passive state and announce its worker connection port.
Modified: SwiftApps/SwiftR/Swift/exec/rserver.swift
===================================================================
--- SwiftApps/SwiftR/Swift/exec/rserver.swift 2010-10-05 00:10:54 UTC (rev 3657)
+++ SwiftApps/SwiftR/Swift/exec/rserver.swift 2010-10-05 21:44:04 UTC (rev 3658)
@@ -16,6 +16,11 @@
bashlocal "-c" @strcat("echo done > ",resultPipeName);
}
+app passivate ()
+{
+ bash "-c" "echo dummy swift job;";
+}
+
(external e[]) apply (string runDir)
{
RData rcalls[] <simple_mapper; location=runDir, prefix="cbatch.", suffix=".Rdata", padding=0>;
@@ -32,6 +37,8 @@
}
}
+passivate();
+
string pipedir = @arg("pipedir");
global string requestPipeName = @strcat(pipedir,"/requestpipe");
global string resultPipeName = @strcat(pipedir,"/resultpipe");
Modified: SwiftApps/SwiftR/Swift/exec/start-swift
===================================================================
--- SwiftApps/SwiftR/Swift/exec/start-swift 2010-10-05 00:10:54 UTC (rev 3657)
+++ SwiftApps/SwiftR/Swift/exec/start-swift 2010-10-05 21:44:04 UTC (rev 3658)
@@ -1,18 +1,72 @@
#! /bin/bash
-site=$1
+# Define internal functions
+function wait-and-start-workers
+{
+ # Look for:
+ # Passive queue processor initialized. Callback URI is http://140.221.8.62:55379
+
+ for try in $(seq 1 20); do
+ uriline=$(grep "Passive queue processor initialized. Callback URI is" $out 2> /dev/null)
+ if [ "_$uriline" = _ ]; then
+ sleep 1
+ else
+ break;
+ fi
+ done
+
+ if [ "_$uriline" = _ ]; then
+ echo "$0: No passive state message from Swift - exiting."
+ exit 1
+ fi
+
+ CONTACT=$(echo $uriline | sed -e 's/^.*http:/http:/')
+ echo Coaster service contact URI: $CONTACT
+
+ LOGDIR=$(pwd)/swiftworkerlogs # full path. FIXME: Generate this with remote-side paths if not shared dir env?
+ LOGDIR=/tmp/$USER/SwiftR/swiftworkerlogs # FIXME: left this in /tmp so it works on any host. Better way?
+
+ # mkdir -p $LOGDIR # is done with the ssh command, below
+
+ IDLETIMEOUT=$((60*60*240)) # 10 days: FIXME: make this a command line arg
+
+ rm -rf remotepid.* # FIXME: should not be needed if we start in a new dir each time
+ for host in $(echo $COMPUTEHOSTS); do
+ timestamp=$(date "+%Y.%m%d.%H%M%S")
+ random=$(awk "BEGIN {printf \"%0.5d\", $RANDOM}")
+ ID=$timestamp.$random
+ # FIXME: make logging an argument; set false by default
+ # fixme:send worker.pl to remote host via stdin or scp.
+ ssh $host /bin/sh -c \'"mkdir -p $LOGDIR"\'
+ scp $SWIFTBIN/worker.pl $host:$LOGDIR
+ ssh $host '/bin/sh -c '\'"WORKER_LOGGING_ENABLED=true $LOGDIR/worker.pl $CONTACT $ID $LOGDIR $IDLETIMEOUT 2>&1 & echo PID=\$!"\' >remotepid.$host </dev/null &
+ sshpids="$sshpids $!"
+ done
+
+ echo Started workers from these ssh processes: $sshpids
+ echo $sshpids > $sshpidfile
+}
+
+# main script
+
+site=$1 # local, ssh, ...
+
# FIXME: check args and use better arg parsing
-tmp=/tmp # FIXME: allow this to change eg for sites with main tmp dir elsewhere
-tmp=/scratch/local # FIXME: allow this to change eg for sites with main tmp dir elsewhere
-
tmp=${SWIFTR_TMP:-/tmp}
+echo DB $0: site=$site tmp=$tmp
+
throttleOneCore="-0.001"
+throttleOneCore="0.00"
localcores=5 # FIXME: parameterize: localthreads=N
SWIFTRBIN=$(cd $(dirname $0); pwd)
+SWIFTBIN=$SWIFTRBIN/../swift/bin # This depends on ~/SwiftR/Swift/swift being a symlink to swift in RLibrary/Swift
+
+echo DB $0: SWIFTRBIN=$SWIFTRBIN SWIFTBIN=$SWIFTBIN
+
rundir=$tmp/$USER/SwiftR/swift.local # rundir prefix # FIXME: handle multiple concurent independent swift servers per user
mkdir -p $(dirname $rundir)
trundir=$(mktemp -d $rundir.XXXX) # FIXME: check success
@@ -20,30 +74,59 @@
ln -s $trundir $rundir
cd $rundir
+echo DB $0: rundir=$(pwd) SWIFTRBIN=$SWIFTRBIN SWIFTBIN=$SWIFTBIN
+
script=$SWIFTRBIN/rserver.swift
-cp $script $SWIFTRBIN/passive-coaster-swift $SWIFTRBIN/swift.properties $rundir
+#cp $script $SWIFTRBIN/passive-coaster-swift $SWIFTRBIN/swift.properties $rundir
+cp $script .
script=$(basename $script)
cp $SWIFTRBIN/{EvalRBatchPersistent.sh,SwiftRServer.sh} .
-# FIXME: rework this script to transfer all shells and rscripts
+# DONE: FIXME: rework this script to transfer all shells and rscripts
# needed, and to copy in the R prelude for the R server processes (to
# include for example the OpenMx library) NOTE: Both were done in older version of this script.
# rm -f requestpipe resultpipe
mkfifo requestpipe resultpipe
-#FIXME JUNK app=/bin/bash
-# FIXME: remove these fossils:
-#serviceport=1985
-#site=local
-#location=$1
-#stagingmethod=$2
-
source $SWIFTRBIN/configure-site-$1
-$SWIFTRBIN/../swift/bin/swift -config cf -tc.file tc -sites.file sites.xml $script -pipedir=$(pwd) >& swift.stdouterr </dev/null
+out=swift.stdouterr
+touch $out
-# wait-for-worker-port
+if [ $site = ssh ]; then
-# $SWIFTRBIN/start-workers-$1 $workerport
+ shift
+ COMPUTEHOSTS=$*
+ sshpidfile=${out/stdouterr/workerpids}
+
+ echo swift output is in: $out, pids in $sshpidfile
+
+ TRAPS="EXIT 1 2 3 15" # Signals and conditions to trap
+
+ function onexit {
+ coasterservicepid="" # null: saved in case we go back to using coaster servers
+ trap - $TRAPS
+ sshpids=$(cat $sshpidfile)
+ echo Terminating worker processes $sshpids, starter $starterpid
+ for rpfile in $(ls -1 remotepid.*); do
+ rpid=$(grep PID= $rpfile | sed -e 's/PID=//')
+ rhost=$(echo $rpfile | sed -e 's/remotepid.//')
+ echo Based on $rpfile: terminating process group of process $rpid on $rhost
+ ssh $rhost sh -c \''PGID=$(ps -p '$rpid' -o pgid --no-headers|sed -e "s/ //g"); kill -s TERM -- -$PGID'\'
+ done
+ if [ "_$sshpids$starterpid$coasterservicepid" != _ ]; then
+ echo kill $sshpids $starterpid $coasterservicepid >& /dev/null
+ fi
+ kill 0 # Kill all procs in current process group # FIXME: what was this for????
+ }
+
+ trap onexit $TRAPS
+
+ wait-and-start-workers &
+ starterpid=$!
+
+fi
+
+$SWIFTRBIN/../swift/bin/swift -config cf -tc.file tc -sites.file sites.xml $script -pipedir=$(pwd) >& $out </dev/null
More information about the Swift-commit
mailing list