[Swift-commit] r3632 - SwiftApps/SwiftR/Swift/exec
noreply at svn.ci.uchicago.edu
noreply at svn.ci.uchicago.edu
Mon Sep 20 23:12:48 CDT 2010
Author: wilde
Date: 2010-09-20 23:12:48 -0500 (Mon, 20 Sep 2010)
New Revision: 3632
Modified:
SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh
SwiftApps/SwiftR/Swift/exec/start-swift-Rserver
SwiftApps/SwiftR/Swift/exec/start-swift-workers
Log:
Permit selection of provider staging vs. local staging. Provider staging works at modest scales but still frequently hangs. Fix some problems in process termination cleanup.
Modified: SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh
===================================================================
--- SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh 2010-09-20 16:37:35 UTC (rev 3631)
+++ SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh 2010-09-21 04:12:48 UTC (rev 3632)
@@ -98,6 +98,8 @@
echo run $(pwd)/$callFile $(pwd)/$resultFile > $SLOTDIR/toR.fifo
touch $SLOTDIR/lastwrite
+echo dummy stderr response 1>&2 # FIXME - testing if this is the provider staging problem (not xfering zero len stderr)
+
head -3 < $SLOTDIR/fromR.fifo # FIXME: Trim this down to 1 line for each call (or same # lines for each, in particular, for "quit")
# Fixme: how to get exceptions and stdout/stderr text from R server ???
\ No newline at end of file
Modified: SwiftApps/SwiftR/Swift/exec/start-swift-Rserver
===================================================================
--- SwiftApps/SwiftR/Swift/exec/start-swift-Rserver 2010-09-20 16:37:35 UTC (rev 3631)
+++ SwiftApps/SwiftR/Swift/exec/start-swift-Rserver 2010-09-21 04:12:48 UTC (rev 3632)
@@ -2,10 +2,15 @@
SWIFTRBIN=$(cd $(dirname $0); pwd)
+serviceport=1985
+
#rundir=/tmp/SwiftR/swiftserver
rundir=/tmp/$USER/SwiftR/swiftserver # FIXME: handle multiple concurent independent swift servers per user
#site=local
+
location=$1
+stagingmethod=$2
+
script=$SWIFTRBIN/rserver.swift
trundir=$(mktemp -d $rundir.XXXX)
@@ -38,6 +43,8 @@
fork bash /bin/bash null null null
END
+if [ $stagingmethod = local-staging ]; then
+
cat >sites.xml <<END
<config>
@@ -56,7 +63,85 @@
<profile namespace="globus" key="workersPerNode">4</profile> <!-- FIXME: make these settable -->
<profile key="jobThrottle" namespace="karajan">.03</profile>
<profile namespace="karajan" key="initialScore">10000</profile>
+# <filesystem provider="local" url="none" />
+ <profile namespace="swift" key="stagingMethod">file</profile>
+ <workdirectory>/tmp/$USER/SwiftR/swiftserver</workdirectory>
+ </pool>
+
+ <pool handle="fork">
+ <execution provider="local" url="none" />
+ <profile namespace="karajan" key="initialScore">10000</profile>
+ <profile namespace="karajan" key="jobThrottle">.03</profile>
+ <filesystem provider="local"/>
+ <workdirectory>/tmp/$USER/SwiftR/swiftserver</workdirectory>
+ </pool>
+
+ <pool handle="passive">
+ <execution provider="coaster" url="none" jobmanager="local:local"/>
+ <profile namespace="globus" key="workerManager">passive</profile>
+ <profile namespace="globus" key="workersPerNode">8</profile>
+ <profile key="jobThrottle" namespace="karajan">.07</profile>
+ <profile namespace="karajan" key="initialScore">10000</profile>
<filesystem provider="local" url="none" />
+ <workdirectory>/home/wilde/swiftwork</workdirectory>
+ </pool>
+
+ <pool handle="service">
+ <execution provider="coaster-persistent" url="http://$(hostname -f):${serviceport}" jobmanager=""/>
+ <profile namespace="globus" key="workerManager">passive</profile>
+ <profile namespace="globus" key="workersPerNode">4</profile> <!-- FIXME: make these settable -->
+ <profile key="jobThrottle" namespace="karajan">.03</profile>
+ <profile namespace="karajan" key="initialScore">10000</profile>
+ <filesystem provider="local" url="none" />
+ <workdirectory>/home/wilde/swiftwork</workdirectory>
+ </pool>
+
+ <pool handle="pbs">
+ <profile namespace="globus" key="maxwalltime">00:00:10</profile>
+ <profile namespace="globus" key="maxtime">1800</profile>
+ <execution provider="coaster" url="none" jobManager="local:pbs"/>
+ <profile namespace="globus" key="workersPerNode">1</profile>
+ <profile namespace="karajan" key="initialScore">10000</profile>
+ <profile namespace="karajan" key="jobThrottle">5.99</profile>
+ <filesystem provider="local"/>
+ <workdirectory>$(pwd)</workdirectory>
+ </pool>
+
+</config>
+END
+
+cat >cf <<END
+wrapperlog.always.transfer=true
+sitedir.keep=true
+execution.retries=0
+lazy.errors=false
+status.mode=provider
+use.provider.staging=false
+#provider.staging.pin.swiftfiles=false
+END
+
+elif [ $stagingmethod = provider-staging ]; then
+
+cat >sites.xml <<END
+<config>
+
+ <pool handle="default">
+ <execution provider="coaster-persistent" url="http://localhost" jobmanager=""/>
+ <profile namespace="globus" key="workerManager">passive</profile>
+ <profile namespace="globus" key="workersPerNode">4</profile> <!-- FIXME: make these settable -->
+ <profile key="jobThrottle" namespace="karajan">.03</profile>
+ <profile namespace="karajan" key="initialScore">10000</profile>
+ <filesystem provider="local" url="none" />
+ <workdirectory>$rundir/swiftwork</workdirectory>
+ </pool>
+
+ <pool handle="local">
+ <execution provider="coaster" url="http://localhost" jobmanager="local:local"/>
+ <profile namespace="globus" key="workersPerNode">4</profile> <!-- FIXME: make these settable -->
+ <profile key="jobThrottle" namespace="karajan">.03</profile>
+ <profile namespace="karajan" key="initialScore">10000</profile>
+# <filesystem provider="local" url="none" />
+ <profile namespace="swift" key="stagingMethod">file</profile>
<workdirectory>/tmp/$USER/SwiftR/swiftserver</workdirectory>
</pool>
@@ -64,6 +149,14 @@
<execution provider="local" url="none" />
<profile namespace="karajan" key="initialScore">10000</profile>
<profile namespace="karajan" key="jobThrottle">.03</profile>
+ <profile namespace="swift" key="stagingMethod">file</profile>
+ <workdirectory>/tmp/$USER/SwiftR/swiftserver</workdirectory>
+ </pool>
+
+ <pool handle="OLDfork">
+ <execution provider="local" url="none" />
+ <profile namespace="karajan" key="initialScore">10000</profile>
+ <profile namespace="karajan" key="jobThrottle">.03</profile>
<filesystem provider="local"/>
<workdirectory>/tmp/$USER/SwiftR/swiftserver</workdirectory>
</pool>
@@ -79,6 +172,16 @@
</pool>
<pool handle="service">
+ <execution provider="coaster-persistent" url="http://$(hostname -f):1985" jobmanager=""/>
+ <profile namespace="globus" key="workerManager">passive</profile>
+ <profile namespace="globus" key="workersPerNode">4</profile> <!-- FIXME: make these settable -->
+ <profile key="jobThrottle" namespace="karajan">.03</profile>
+ <profile namespace="karajan" key="initialScore">10000</profile>
+ <profile namespace="swift" key="stagingMethod">proxy</profile>
+ <workdirectory>/tmp/wilde/SwiftR/swiftwork</workdirectory>
+ </pool>
+
+ <pool handle="OLDservice">
<execution provider="coaster-persistent" url="http://localhost" jobmanager=""/>
<profile namespace="globus" key="workerManager">passive</profile>
<profile namespace="globus" key="workersPerNode">4</profile> <!-- FIXME: make these settable -->
@@ -102,12 +205,27 @@
</config>
END
+cat >cf <<END
+wrapperlog.always.transfer=true
+sitedir.keep=true
+execution.retries=0
+lazy.errors=false
+status.mode=provider
+#use.provider.staging=false
+use.provider.staging=true
+provider.staging.pin.swiftfiles=false
+END
+
+fi
+
cp $SWIFTRBIN/{EvalRBatchPersistent.sh,SwiftRServer.sh} .
script=$(basename $script)
-$SWIFTRBIN/../swift/bin/swift -config swift.properties -tc.file tc -sites.file sites.xml $script -pipedir=$(pwd) -location=$location >& swift.stdouterr </dev/null
+# $SWIFTRBIN/../swift/bin/swift -config swift.properties -tc.file tc -sites.file sites.xml $script -pipedir=$(pwd) -location=$location >& swift.stdouterr </dev/null
+$SWIFTRBIN/../swift/bin/swift -config cf -tc.file tc -sites.file sites.xml $script -pipedir=$(pwd) -location=$location >& swift.stdouterr </dev/null
+
exit
Modified: SwiftApps/SwiftR/Swift/exec/start-swift-workers
===================================================================
--- SwiftApps/SwiftR/Swift/exec/start-swift-workers 2010-09-20 16:37:35 UTC (rev 3631)
+++ SwiftApps/SwiftR/Swift/exec/start-swift-workers 2010-09-21 04:12:48 UTC (rev 3632)
@@ -1,11 +1,19 @@
#! /bin/bash
-COMPUTEHOSTS=$1
+# Command arguments
+
+STAGINGMETHOD=${1:-local-staging}
+COMPUTEHOSTS=${2:-localhost}
+
# COMPUTEHOSTS='crush thwomp stomp crank steamroller grind churn trounce thrash vanquish'
# COMPUTEHOSTS='communicado'
-shift
+# Parameters # FIXME: Add to command line options
+SERVICEPORT=1985
+
+# shift # FIXME: Why? Fossil?
+
SWIFTRBIN=$(cd $(dirname $0); pwd) # Find our bin dir (to use for running utility scripts)
SWIFTBIN=$SWIFTRBIN/../swift/bin
@@ -22,6 +30,7 @@
break;
fi
done
+ # FIXME: What happened to the error check here: in case server was not set to passive???
CONTACT=$(echo $uriline | sed -e 's/^.*http:/http:/')
echo Coaster contact: $CONTACT
@@ -37,7 +46,11 @@
random=$(awk "BEGIN {printf \"%0.5d\", $RANDOM}")
ID=$timestamp.$random
# FIXME: make logging an argument; set false by default
- ssh $host '/bin/sh -c '\'"mkdir -p $LOGDIR; WORKER_LOGGING_ENABLED=true $SWIFTBIN/worker.pl $CONTACT $ID $LOGDIR $IDLETIMEOUT 2>&1 & echo PID=\$!"\' >remotepid.$host </dev/null &
+ # fixme:send worker.pl to remote host via stdin or scp.
+###### ssh $host '/bin/sh -c '\'"mkdir -p $LOGDIR; WORKER_LOGGING_ENABLED=true $SWIFTBIN/worker.pl $CONTACT $ID $LOGDIR $IDLETIMEOUT 2>&1 & echo PID=\$!"\' >remotepid.$host </dev/null &
+ ssh $host /bin/sh -c \'"mkdir -p $LOGDIR"\'
+ scp $SWIFTBIN/worker.pl $host:$LOGDIR
+ ssh $host '/bin/sh -c '\'"WORKER_LOGGING_ENABLED=true $LOGDIR/worker.pl $CONTACT $ID $LOGDIR $IDLETIMEOUT 2>&1 & echo PID=\$!"\' >remotepid.$host </dev/null &
sshpids="$sshpids $!"
done
@@ -46,18 +59,29 @@
}
rundir=/tmp/$USER/SwiftR/swiftworkers
-mkdir -p $rundir
+
+trundir=$(mktemp -d $rundir.XXXX)
+mkdir -p $trundir
+rm -rf $rundir
+ln -s $trundir $rundir
+
servicedir=service
cd $rundir
-out=`mktemp swift.stdouterr.XXXX`
+out=swift.stdouterr
-$SWIFTBIN/coaster-service -nosec >& coaster-service.log &
+$SWIFTBIN/coaster-service -nosec -p $SERVICEPORT >& coaster-service.log &
coasterservicepid=$!
+cat >tc <<END
+localhost sh /bin/sh null null null
+END
+
+if [ $STAGINGMETHOD = local-staging ]; then
+
cat >sites.xml <<END
<config>
<pool handle="localhost">
- <execution provider="coaster-persistent" url="http://bridled.ci.uchicago.edu" jobmanager="local:local"/>
+ <execution provider="coaster-persistent" url="http://$(hostname -f):${SERVICEPORT}" jobmanager="local:local"/>
<profile namespace="globus" key="workerManager">passive</profile>
<profile namespace="globus" key="workersPerNode">4</profile>
<profile key="jobThrottle" namespace="karajan">.03</profile>
@@ -68,21 +92,44 @@
</config>
END
-cat >tc <<END
-localhost sh /bin/sh null null null
-END
-
cat >cf <<END
-
wrapperlog.always.transfer=true
sitedir.keep=true
execution.retries=0
lazy.errors=false
status.mode=provider
use.provider.staging=false
+provider.staging.pin.swiftfiles=false
+END
+elif [ $STAGINGMETHOD = provider-staging ]; then
+
+cat >sites.xml <<END
+<config>
+ <pool handle="localhost">
+ <execution provider="coaster-persistent" url="http://$(hostname -f):${SERVICEPORT}" jobmanager="local:local"/>
+ <profile namespace="globus" key="workerManager">passive</profile>
+ <profile namespace="globus" key="workersPerNode">4</profile>
+ <profile key="jobThrottle" namespace="karajan">.03</profile>
+ <profile namespace="karajan" key="initialScore">10000</profile>
+ <profile namespace="swift" key="stagingMethod">proxy</profile>
+ <workdirectory>/tmp/$USER/SwiftR/swiftwork</workdirectory>
+ </pool>
+</config>
END
+cat >cf <<END
+wrapperlog.always.transfer=true
+sitedir.keep=true
+execution.retries=0
+lazy.errors=false
+status.mode=provider
+use.provider.staging=true
+provider.staging.pin.swiftfiles=false
+END
+
+fi
+
cat >passivate.swift <<END
type file;
@@ -110,8 +157,8 @@
for rpfile in $(ls -1 remotepid.*); do
rpid=$(grep PID= $rpfile | sed -e 's/PID=//')
rhost=$(echo $rpfile | sed -e 's/remotepid.//')
- echo from $rpfile: doing ssh $rhost kill $rpid
- ssh $rhost kill -s TERM -- '-$(' ps -p $rpid -o pgid --no-headers ')'
+ echo Based on $rpfile: terminating process group of process $rpid on $rhost
+ ssh $rhost sh -c \'kill -s TERM -- '-$(' ps -p $rpid -o pgid --no-headers ')'\'
done
if [ "_$sshpids$starterpid$coasterservicepid" != _ ]; then
echo kill $sshpids $starterpid $coasterservicepid >& /dev/null
@@ -128,6 +175,6 @@
echo "==> Service started and set to passive mode. Use ^C to terminate all services and workers."
-$SWIFTRBIN/start-swift-Rserver service
+$SWIFTRBIN/start-swift-Rserver service $STAGINGMETHOD
wait
More information about the Swift-commit
mailing list