[Swift-commit] r3658 - SwiftApps/SwiftR/Swift/exec

noreply at svn.ci.uchicago.edu noreply at svn.ci.uchicago.edu
Tue Oct 5 16:44:04 CDT 2010


Author: wilde
Date: 2010-10-05 16:44:04 -0500 (Tue, 05 Oct 2010)
New Revision: 3658

Modified:
   SwiftApps/SwiftR/Swift/exec/rserver.swift
   SwiftApps/SwiftR/Swift/exec/start-swift
Log:
Add support for start-swift ssh case; make rserver.swift run a dummy job on startup to force the coaster provider into passive state and announce its worker connection port.

Modified: SwiftApps/SwiftR/Swift/exec/rserver.swift
===================================================================
--- SwiftApps/SwiftR/Swift/exec/rserver.swift	2010-10-05 00:10:54 UTC (rev 3657)
+++ SwiftApps/SwiftR/Swift/exec/rserver.swift	2010-10-05 21:44:04 UTC (rev 3658)
@@ -16,6 +16,11 @@
    bashlocal "-c" @strcat("echo done > ",resultPipeName);
 }
 
+app passivate ()
+{
+  bash "-c" "echo dummy swift job;";
+}
+
 (external e[]) apply (string runDir)
 {
   RData rcalls[]  <simple_mapper; location=runDir, prefix="cbatch.", suffix=".Rdata", padding=0>;
@@ -32,6 +37,8 @@
   }
 }
 
+passivate();
+
 string pipedir = @arg("pipedir");
 global string requestPipeName = @strcat(pipedir,"/requestpipe");
 global string resultPipeName = @strcat(pipedir,"/resultpipe");

Modified: SwiftApps/SwiftR/Swift/exec/start-swift
===================================================================
--- SwiftApps/SwiftR/Swift/exec/start-swift	2010-10-05 00:10:54 UTC (rev 3657)
+++ SwiftApps/SwiftR/Swift/exec/start-swift	2010-10-05 21:44:04 UTC (rev 3658)
@@ -1,18 +1,72 @@
 #! /bin/bash
 
-site=$1
+# Define internal functions
 
+function wait-and-start-workers
+{
+  # Look for:
+  # Passive queue processor initialized. Callback URI is http://140.221.8.62:55379
+
+  for try in $(seq 1 20); do
+    uriline=$(grep "Passive queue processor initialized. Callback URI is" $out 2> /dev/null)
+    if [ "_$uriline" = _ ]; then
+      sleep 1
+    else
+      break;
+    fi
+  done
+
+  if [ "_$uriline" = _ ]; then
+    echo "$0: No passive state message from Swift - exiting."
+    exit 1
+  fi
+
+  CONTACT=$(echo $uriline | sed -e 's/^.*http:/http:/')
+  echo Coaster service contact URI: $CONTACT
+
+  LOGDIR=$(pwd)/swiftworkerlogs # full path. FIXME: Generate this with remote-side paths if not shared dir env?
+  LOGDIR=/tmp/$USER/SwiftR/swiftworkerlogs  # FIXME: left this in /tmp so it works on any host. Better way?
+
+  #  mkdir -p $LOGDIR # is done with the ssh command, below
+
+  IDLETIMEOUT=$((60*60*240)) # 10 days: FIXME: make this a command line arg
+
+  rm -rf remotepid.* # FIXME: should not be needed if we start in a new dir each time
+  for host in $(echo $COMPUTEHOSTS); do
+    timestamp=$(date "+%Y.%m%d.%H%M%S")
+    random=$(awk "BEGIN {printf \"%0.5d\", $RANDOM}")
+    ID=$timestamp.$random
+       # FIXME: make logging an argument; set false by default
+       # fixme:send worker.pl to remote host via stdin or scp.
+       ssh $host /bin/sh -c \'"mkdir -p $LOGDIR"\'
+       scp $SWIFTBIN/worker.pl $host:$LOGDIR
+       ssh $host '/bin/sh -c '\'"WORKER_LOGGING_ENABLED=true $LOGDIR/worker.pl $CONTACT $ID $LOGDIR $IDLETIMEOUT 2>&1 & echo PID=\$!"\'  >remotepid.$host </dev/null &
+       sshpids="$sshpids $!"
+  done
+
+  echo Started workers from these ssh processes: $sshpids
+  echo $sshpids > $sshpidfile
+}
+
+# main script
+
+site=$1 # local, ssh, ...
+
 # FIXME: check args and use better arg parsing
 
-tmp=/tmp # FIXME: allow this to change eg for sites with main tmp dir elsewhere
-tmp=/scratch/local # FIXME: allow this to change eg for sites with main tmp dir elsewhere
-
 tmp=${SWIFTR_TMP:-/tmp}
 
+echo DB $0: site=$site tmp=$tmp
+
 throttleOneCore="-0.001"
+throttleOneCore="0.00"
 localcores=5 # FIXME: parameterize: localthreads=N
 
 SWIFTRBIN=$(cd $(dirname $0); pwd)
+SWIFTBIN=$SWIFTRBIN/../swift/bin  # This depends on ~/SwiftR/Swift/swift being a symlink to swift in RLibrary/Swift
+
+echo DB $0: SWIFTRBIN=$SWIFTRBIN SWIFTBIN=$SWIFTBIN
+
 rundir=$tmp/$USER/SwiftR/swift.local  # rundir prefix # FIXME: handle multiple concurent independent swift servers per user
 mkdir -p $(dirname $rundir)
 trundir=$(mktemp -d $rundir.XXXX) # FIXME: check success
@@ -20,30 +74,59 @@
 ln -s $trundir $rundir
 cd $rundir
 
+echo DB $0: rundir=$(pwd) SWIFTRBIN=$SWIFTRBIN SWIFTBIN=$SWIFTBIN
+
 script=$SWIFTRBIN/rserver.swift
-cp $script $SWIFTRBIN/passive-coaster-swift $SWIFTRBIN/swift.properties $rundir
+#cp $script $SWIFTRBIN/passive-coaster-swift $SWIFTRBIN/swift.properties $rundir
+cp $script .
 script=$(basename $script)
 cp $SWIFTRBIN/{EvalRBatchPersistent.sh,SwiftRServer.sh} .
 
-# FIXME: rework this script to transfer all shells and rscripts
+# DONE: FIXME: rework this script to transfer all shells and rscripts
 # needed, and to copy in the R prelude for the R server processes (to
 # include for example the OpenMx library)  NOTE: Both were done in older version of this script.
 
 # rm -f requestpipe resultpipe
 mkfifo requestpipe resultpipe
 
-#FIXME JUNK app=/bin/bash
-# FIXME: remove these fossils:
-#serviceport=1985
-#site=local
-#location=$1
-#stagingmethod=$2
-
 source $SWIFTRBIN/configure-site-$1
 
-$SWIFTRBIN/../swift/bin/swift -config cf -tc.file tc -sites.file sites.xml $script -pipedir=$(pwd) >& swift.stdouterr </dev/null
+out=swift.stdouterr
+touch $out
 
-# wait-for-worker-port
+if [ $site = ssh ]; then
 
-# $SWIFTRBIN/start-workers-$1 $workerport
+  shift
+  COMPUTEHOSTS=$*
 
+  sshpidfile=${out/stdouterr/workerpids}
+
+  echo swift output is in: $out, pids in $sshpidfile
+
+  TRAPS="EXIT 1 2 3 15"  # Signals and conditions to trap
+
+  function onexit {
+    coasterservicepid="" # null: saved in case we go back to using coaster servers
+    trap - $TRAPS
+    sshpids=$(cat $sshpidfile)
+    echo Terminating worker processes $sshpids, starter $starterpid
+    for rpfile in $(ls -1 remotepid.*); do
+      rpid=$(grep PID= $rpfile | sed -e 's/PID=//')
+      rhost=$(echo $rpfile | sed -e 's/remotepid.//')
+      echo Based on $rpfile: terminating process group of process $rpid on $rhost
+      ssh $rhost sh -c \''PGID=$(ps -p '$rpid' -o pgid --no-headers|sed -e "s/ //g"); kill -s TERM -- -$PGID'\'
+    done
+    if [ "_$sshpids$starterpid$coasterservicepid" != _ ]; then
+      echo kill $sshpids $starterpid $coasterservicepid >& /dev/null
+    fi
+    kill 0 # Kill all procs in current process group # FIXME: what was this for????
+  }
+
+  trap onexit $TRAPS
+
+  wait-and-start-workers &
+  starterpid=$!
+
+fi
+
+$SWIFTRBIN/../swift/bin/swift -config cf -tc.file tc -sites.file sites.xml $script -pipedir=$(pwd) >& $out </dev/null




More information about the Swift-commit mailing list