[Swift-commit] r3632 - SwiftApps/SwiftR/Swift/exec

noreply at svn.ci.uchicago.edu noreply at svn.ci.uchicago.edu
Mon Sep 20 23:12:48 CDT 2010


Author: wilde
Date: 2010-09-20 23:12:48 -0500 (Mon, 20 Sep 2010)
New Revision: 3632

Modified:
   SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh
   SwiftApps/SwiftR/Swift/exec/start-swift-Rserver
   SwiftApps/SwiftR/Swift/exec/start-swift-workers
Log:
Permit selection of provider staging vs. local staging. Provider staging works at modest scales but still frequently hangs. Fix some problems in process termination cleanup.

Modified: SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh
===================================================================
--- SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh	2010-09-20 16:37:35 UTC (rev 3631)
+++ SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh	2010-09-21 04:12:48 UTC (rev 3632)
@@ -98,6 +98,8 @@
 echo run $(pwd)/$callFile $(pwd)/$resultFile > $SLOTDIR/toR.fifo
 touch $SLOTDIR/lastwrite
 
+echo dummy stderr response 1>&2 # FIXME - testing if this is the provider staging problem (not xfering zero len stderr)
+
 head -3 < $SLOTDIR/fromR.fifo # FIXME: Trim this down to 1 line for each call (or same # lines for each, in particular, for "quit")
 
 # Fixme: how to get exceptions and stdout/stderr text from R server ???
\ No newline at end of file

Modified: SwiftApps/SwiftR/Swift/exec/start-swift-Rserver
===================================================================
--- SwiftApps/SwiftR/Swift/exec/start-swift-Rserver	2010-09-20 16:37:35 UTC (rev 3631)
+++ SwiftApps/SwiftR/Swift/exec/start-swift-Rserver	2010-09-21 04:12:48 UTC (rev 3632)
@@ -2,10 +2,15 @@
 
 SWIFTRBIN=$(cd $(dirname $0); pwd)
 
+serviceport=1985
+
 #rundir=/tmp/SwiftR/swiftserver
 rundir=/tmp/$USER/SwiftR/swiftserver # FIXME: handle multiple concurent independent swift servers per user
 #site=local
+
 location=$1
+stagingmethod=$2
+
 script=$SWIFTRBIN/rserver.swift
 
 trundir=$(mktemp -d $rundir.XXXX)
@@ -38,6 +43,8 @@
 fork   bash       /bin/bash null null null
 END
 
+if [ $stagingmethod = local-staging ]; then
+
 cat >sites.xml <<END
 <config>
 
@@ -56,7 +63,85 @@
     <profile namespace="globus" key="workersPerNode">4</profile> <!-- FIXME: make these settable -->
     <profile key="jobThrottle" namespace="karajan">.03</profile>
     <profile namespace="karajan" key="initialScore">10000</profile>
+#    <filesystem provider="local" url="none" />
+    <profile namespace="swift" key="stagingMethod">file</profile>
+    <workdirectory>/tmp/$USER/SwiftR/swiftserver</workdirectory>
+  </pool>
+
+  <pool handle="fork">
+    <execution provider="local" url="none" />
+    <profile namespace="karajan" key="initialScore">10000</profile>
+    <profile namespace="karajan" key="jobThrottle">.03</profile>
+    <filesystem provider="local"/>
+    <workdirectory>/tmp/$USER/SwiftR/swiftserver</workdirectory>
+  </pool>
+
+  <pool handle="passive">
+    <execution provider="coaster" url="none" jobmanager="local:local"/>
+    <profile namespace="globus" key="workerManager">passive</profile> 
+    <profile namespace="globus" key="workersPerNode">8</profile>
+    <profile key="jobThrottle" namespace="karajan">.07</profile>
+    <profile namespace="karajan" key="initialScore">10000</profile>
     <filesystem provider="local" url="none" />
+    <workdirectory>/home/wilde/swiftwork</workdirectory>
+  </pool>
+
+  <pool handle="service">
+    <execution provider="coaster-persistent" url="http://$(hostname -f):${serviceport}" jobmanager=""/>
+    <profile namespace="globus" key="workerManager">passive</profile>
+    <profile namespace="globus" key="workersPerNode">4</profile> <!-- FIXME: make these settable -->
+    <profile key="jobThrottle" namespace="karajan">.03</profile>
+    <profile namespace="karajan" key="initialScore">10000</profile>
+    <filesystem provider="local" url="none" />
+    <workdirectory>/home/wilde/swiftwork</workdirectory>
+  </pool>
+
+  <pool handle="pbs">
+    <profile namespace="globus" key="maxwalltime">00:00:10</profile>
+    <profile namespace="globus" key="maxtime">1800</profile>
+    <execution provider="coaster" url="none" jobManager="local:pbs"/>
+    <profile namespace="globus" key="workersPerNode">1</profile>
+    <profile namespace="karajan" key="initialScore">10000</profile>
+    <profile namespace="karajan" key="jobThrottle">5.99</profile>
+    <filesystem provider="local"/>
+    <workdirectory>$(pwd)</workdirectory>
+  </pool>
+
+</config>
+END
+
+cat >cf <<END
+wrapperlog.always.transfer=true
+sitedir.keep=true
+execution.retries=0
+lazy.errors=false
+status.mode=provider
+use.provider.staging=false
+#provider.staging.pin.swiftfiles=false
+END
+
+elif [ $stagingmethod = provider-staging ]; then
+
+cat >sites.xml <<END
+<config>
+
+  <pool handle="default">
+    <execution provider="coaster-persistent" url="http://localhost" jobmanager=""/>
+    <profile namespace="globus" key="workerManager">passive</profile>
+    <profile namespace="globus" key="workersPerNode">4</profile> <!-- FIXME: make these settable -->
+    <profile key="jobThrottle" namespace="karajan">.03</profile>
+    <profile namespace="karajan" key="initialScore">10000</profile>
+    <filesystem provider="local" url="none" />
+    <workdirectory>$rundir/swiftwork</workdirectory>
+  </pool>
+
+  <pool handle="local">
+    <execution provider="coaster" url="http://localhost" jobmanager="local:local"/>
+    <profile namespace="globus" key="workersPerNode">4</profile> <!-- FIXME: make these settable -->
+    <profile key="jobThrottle" namespace="karajan">.03</profile>
+    <profile namespace="karajan" key="initialScore">10000</profile>
+#    <filesystem provider="local" url="none" />
+    <profile namespace="swift" key="stagingMethod">file</profile>
     <workdirectory>/tmp/$USER/SwiftR/swiftserver</workdirectory>
   </pool>
 
@@ -64,6 +149,14 @@
     <execution provider="local" url="none" />
     <profile namespace="karajan" key="initialScore">10000</profile>
     <profile namespace="karajan" key="jobThrottle">.03</profile>
+    <profile namespace="swift" key="stagingMethod">file</profile>
+    <workdirectory>/tmp/$USER/SwiftR/swiftserver</workdirectory>
+  </pool>
+
+  <pool handle="OLDfork">
+    <execution provider="local" url="none" />
+    <profile namespace="karajan" key="initialScore">10000</profile>
+    <profile namespace="karajan" key="jobThrottle">.03</profile>
     <filesystem provider="local"/>
     <workdirectory>/tmp/$USER/SwiftR/swiftserver</workdirectory>
   </pool>
@@ -79,6 +172,16 @@
   </pool>
 
   <pool handle="service">
+    <execution provider="coaster-persistent" url="http://$(hostname -f):1985" jobmanager=""/>
+    <profile namespace="globus" key="workerManager">passive</profile>
+    <profile namespace="globus" key="workersPerNode">4</profile> <!-- FIXME: make these settable -->
+    <profile key="jobThrottle" namespace="karajan">.03</profile>
+    <profile namespace="karajan" key="initialScore">10000</profile>
+    <profile namespace="swift" key="stagingMethod">proxy</profile>
+    <workdirectory>/tmp/wilde/SwiftR/swiftwork</workdirectory>
+  </pool>
+
+  <pool handle="OLDservice">
     <execution provider="coaster-persistent" url="http://localhost" jobmanager=""/>
     <profile namespace="globus" key="workerManager">passive</profile>
     <profile namespace="globus" key="workersPerNode">4</profile> <!-- FIXME: make these settable -->
@@ -102,12 +205,27 @@
 </config>
 END
 
+cat >cf <<END
+wrapperlog.always.transfer=true
+sitedir.keep=true
+execution.retries=0
+lazy.errors=false
+status.mode=provider
+#use.provider.staging=false
+use.provider.staging=true
+provider.staging.pin.swiftfiles=false
+END
+
+fi
+
 cp $SWIFTRBIN/{EvalRBatchPersistent.sh,SwiftRServer.sh} .
 
 script=$(basename $script)
 
-$SWIFTRBIN/../swift/bin/swift -config swift.properties -tc.file tc -sites.file sites.xml $script -pipedir=$(pwd) -location=$location >& swift.stdouterr </dev/null
+# $SWIFTRBIN/../swift/bin/swift -config swift.properties -tc.file tc -sites.file sites.xml $script -pipedir=$(pwd) -location=$location >& swift.stdouterr </dev/null
 
+$SWIFTRBIN/../swift/bin/swift -config cf -tc.file tc -sites.file sites.xml $script -pipedir=$(pwd) -location=$location >& swift.stdouterr </dev/null
+
 exit
 
 

Modified: SwiftApps/SwiftR/Swift/exec/start-swift-workers
===================================================================
--- SwiftApps/SwiftR/Swift/exec/start-swift-workers	2010-09-20 16:37:35 UTC (rev 3631)
+++ SwiftApps/SwiftR/Swift/exec/start-swift-workers	2010-09-21 04:12:48 UTC (rev 3632)
@@ -1,11 +1,19 @@
 #! /bin/bash
 
-COMPUTEHOSTS=$1
+# Command arguments
+
+STAGINGMETHOD=${1:-local-staging}
+COMPUTEHOSTS=${2:-localhost}
+
 #  COMPUTEHOSTS='crush thwomp stomp crank steamroller grind churn trounce thrash vanquish'
 #  COMPUTEHOSTS='communicado'
 
-shift
+# Parameters # FIXME: Add to command line options
 
+SERVICEPORT=1985
+
+# shift # FIXME: Why? Fossil?
+
 SWIFTRBIN=$(cd $(dirname $0); pwd) # Find our bin dir (to use for running utility scripts)
 SWIFTBIN=$SWIFTRBIN/../swift/bin
 
@@ -22,6 +30,7 @@
       break;
     fi
   done
+  # FIXME: What happened to the error check here: in case server was not set to passive???
   CONTACT=$(echo $uriline | sed -e 's/^.*http:/http:/')
   echo Coaster contact: $CONTACT
 
@@ -37,7 +46,11 @@
     random=$(awk "BEGIN {printf \"%0.5d\", $RANDOM}")
     ID=$timestamp.$random
        # FIXME: make logging an argument; set false by default
-       ssh $host '/bin/sh -c '\'"mkdir -p $LOGDIR; WORKER_LOGGING_ENABLED=true $SWIFTBIN/worker.pl $CONTACT $ID $LOGDIR $IDLETIMEOUT 2>&1 & echo PID=\$!"\'  >remotepid.$host </dev/null &
+       # fixme:send worker.pl to remote host via stdin or scp.
+###### ssh $host '/bin/sh -c '\'"mkdir -p $LOGDIR; WORKER_LOGGING_ENABLED=true $SWIFTBIN/worker.pl $CONTACT $ID $LOGDIR $IDLETIMEOUT 2>&1 & echo PID=\$!"\'  >remotepid.$host </dev/null &
+       ssh $host /bin/sh -c \'"mkdir -p $LOGDIR"\'
+       scp $SWIFTBIN/worker.pl $host:$LOGDIR
+       ssh $host '/bin/sh -c '\'"WORKER_LOGGING_ENABLED=true $LOGDIR/worker.pl $CONTACT $ID $LOGDIR $IDLETIMEOUT 2>&1 & echo PID=\$!"\'  >remotepid.$host </dev/null &
        sshpids="$sshpids $!"
   done
 
@@ -46,18 +59,29 @@
 }
 
 rundir=/tmp/$USER/SwiftR/swiftworkers
-mkdir -p $rundir
+
+trundir=$(mktemp -d $rundir.XXXX)
+mkdir -p $trundir
+rm -rf $rundir
+ln -s $trundir $rundir
+
 servicedir=service
 cd $rundir
-out=`mktemp swift.stdouterr.XXXX`
+out=swift.stdouterr
 
-$SWIFTBIN/coaster-service -nosec >& coaster-service.log &
+$SWIFTBIN/coaster-service -nosec -p $SERVICEPORT >& coaster-service.log &
 coasterservicepid=$!
 
+cat >tc <<END
+localhost sh /bin/sh null null null
+END
+
+if [ $STAGINGMETHOD = local-staging ]; then
+
 cat >sites.xml <<END
 <config>
   <pool handle="localhost">
-    <execution provider="coaster-persistent" url="http://bridled.ci.uchicago.edu" jobmanager="local:local"/>
+    <execution provider="coaster-persistent" url="http://$(hostname -f):${SERVICEPORT}" jobmanager="local:local"/>
     <profile namespace="globus" key="workerManager">passive</profile>
     <profile namespace="globus" key="workersPerNode">4</profile>
     <profile key="jobThrottle" namespace="karajan">.03</profile>
@@ -68,21 +92,44 @@
 </config>
 END
 
-cat >tc <<END
-localhost sh /bin/sh null null null
-END
-
 cat >cf <<END
-
 wrapperlog.always.transfer=true
 sitedir.keep=true
 execution.retries=0
 lazy.errors=false
 status.mode=provider
 use.provider.staging=false
+provider.staging.pin.swiftfiles=false
+END
 
+elif [ $STAGINGMETHOD = provider-staging ]; then
+
+cat >sites.xml <<END
+<config>
+  <pool handle="localhost">
+    <execution provider="coaster-persistent" url="http://$(hostname -f):${SERVICEPORT}" jobmanager="local:local"/>
+    <profile namespace="globus" key="workerManager">passive</profile>
+    <profile namespace="globus" key="workersPerNode">4</profile>
+    <profile key="jobThrottle" namespace="karajan">.03</profile>
+    <profile namespace="karajan" key="initialScore">10000</profile>
+    <profile namespace="swift" key="stagingMethod">proxy</profile>
+    <workdirectory>/tmp/$USER/SwiftR/swiftwork</workdirectory>
+  </pool>
+</config>
 END
 
+cat >cf <<END
+wrapperlog.always.transfer=true
+sitedir.keep=true
+execution.retries=0
+lazy.errors=false
+status.mode=provider
+use.provider.staging=true
+provider.staging.pin.swiftfiles=false
+END
+
+fi
+
 cat >passivate.swift <<END
 
 type file;
@@ -110,8 +157,8 @@
   for rpfile in $(ls -1 remotepid.*); do
     rpid=$(grep PID= $rpfile | sed -e 's/PID=//')
     rhost=$(echo $rpfile | sed -e 's/remotepid.//')
-    echo from $rpfile: doing ssh $rhost kill $rpid
-    ssh $rhost kill -s TERM -- '-$(' ps -p $rpid -o pgid --no-headers ')'
+    echo Based on $rpfile: terminating process group of process $rpid on $rhost
+    ssh $rhost sh -c \'kill -s TERM -- '-$(' ps -p $rpid -o pgid --no-headers ')'\'
   done
   if [ "_$sshpids$starterpid$coasterservicepid" != _ ]; then
     echo kill $sshpids $starterpid $coasterservicepid >& /dev/null
@@ -128,6 +175,6 @@
 
 echo "==> Service started and set to passive mode. Use ^C to terminate all services and workers."
 
-$SWIFTRBIN/start-swift-Rserver service
+$SWIFTRBIN/start-swift-Rserver service $STAGINGMETHOD
 
 wait




More information about the Swift-commit mailing list