[Swift-commit] r4027 - SwiftApps/SwiftR/Swift/exec

noreply at svn.ci.uchicago.edu noreply at svn.ci.uchicago.edu
Fri Jan 21 15:10:16 CST 2011


Author: tga
Date: 2011-01-21 15:10:16 -0600 (Fri, 21 Jan 2011)
New Revision: 4027

Modified:
   SwiftApps/SwiftR/Swift/exec/start-swift
Log:
Fixed bug with cleanup of worker processes on ssh machines.
The previous approach of getting the pid of the shell script, then later using that to figure out the process group id was fundamentally flawed as the shell script could terminate in the meantime.  Now the process group id of the worker processes is collected and stored at the same time as the workers are launched.


Modified: SwiftApps/SwiftR/Swift/exec/start-swift
===================================================================
--- SwiftApps/SwiftR/Swift/exec/start-swift	2011-01-21 20:15:14 UTC (rev 4026)
+++ SwiftApps/SwiftR/Swift/exec/start-swift	2011-01-21 21:10:16 UTC (rev 4027)
@@ -48,7 +48,11 @@
        # fixme:send worker.pl to remote host via stdin or scp.
        ssh $host /bin/sh -c \'"mkdir -p $LOGDIR"\'
        scp $SWIFTBIN/worker.pl $host:$LOGDIR
-       ssh $host '/bin/sh -c '\'"WORKER_LOGGING_LEVEL=$workerLogging $LOGDIR/worker.pl $CONTACT $ID $LOGDIR $IDLETIMEOUT 2>&1 & echo PID=\$!"\'  >remotepid.$host </dev/null &
+
+       STARTCMD="WORKER_LOGGING_LEVEL=$workerLogging; $LOGDIR/worker.pl $CONTACT $ID $LOGDIR $IDLETIMEOUT 2>&1 &"'
+                    echo PGID=`ps --no-headers -o '%r' \$\$`'
+       #echo ssh $host '/bin/sh -c '\'"$STARTCMD"\'  >remotepid.$host </dev/null &
+       ssh $host '/bin/sh -c '\'"$STARTCMD"\'  >remotepid.$host </dev/null &
        sshpids="$sshpids $!"
   done
 
@@ -420,17 +424,19 @@
     coasterservicepid="" # null: saved in case we go back to using coaster servers
     trap - $TRAPEVENTS
     sshpids=$(cat $sshpidfile)
-    echo Terminating worker processes $sshpids, starter $starterpid
+#    echo Terminating worker processes $sshpids, starter $starterpid
     for rpfile in $(ls -1 remotepid.*); do
-      rpid=$(grep PID= $rpfile | sed -e 's/PID=//')
+      rpgid=$(grep PGID= $rpfile | sed -e 's/PGID=//')
       rhost=$(echo $rpfile | sed -e 's/remotepid.//')
-      echo Based on $rpfile: terminating process group of process $rpid on $rhost
-      ssh $rhost sh -c \''PGID=$(ps -p '$rpid' -o pgid --no-headers|sed -e "s/ //g"); kill -s TERM -- -$PGID'\'
+      #echo Based on $rpfile: terminating process process group $rpgid on $rhost
+      echo Shutting down worker processes on $rhost
+      ssh $rhost sh -c \'"kill -s TERM -- -$rpgid &>/dev/null"\' 
     done
     if [ "_$sshpids$starterpid$coasterservicepid" != _ ]; then
       echo kill $sshpids $starterpid $coasterservicepid >& /dev/null
     fi
-    kill 0 # Kill all procs in current process group # FIXME: what was this for????
+    # exit cleanly
+    exit 0
   }
 
   trap onexit $TRAPEVENTS
@@ -464,8 +470,9 @@
     if [ "_$jobid != _ ]; then
       qdel $jobid
     fi
-    kill 0 # Kill all procs in current process group # FIXME: what was this for????
-  }
+    # eit cleanly
+    exit 0   
+}
 
   trap onexit $TRAPEVENTS
 




More information about the Swift-commit mailing list