[Swift-commit] r4922 - trunk/bin/grid
ketan at ci.uchicago.edu
ketan at ci.uchicago.edu
Mon Aug 1 14:29:06 CDT 2011
Author: ketan
Date: 2011-08-01 14:29:06 -0500 (Mon, 01 Aug 2011)
New Revision: 4922
Added:
trunk/bin/grid/start-mcs
Modified:
trunk/bin/grid/start-ranger-service
Log:
Added passive coasters start script for mcs network of machines: crush, stomp, etc.
Added: trunk/bin/grid/start-mcs
===================================================================
--- trunk/bin/grid/start-mcs (rev 0)
+++ trunk/bin/grid/start-mcs 2011-08-01 19:29:06 UTC (rev 4922)
@@ -0,0 +1,217 @@
+#! /bin/bash
+
+Usage()
+{
+ echo $0: 'Usage: start-mcs [-ls|--local-staging|-ps|--provider-staging|-sp|--service-port portnum] site1 ... siteN'
+}
+
+# Command arguments
+
+STAGINGMETHOD=local-staging
+COMPUTEHOSTS=localhost
+
+while [ $# -gt 0 -a $(expr "$1" : -) = 1 ]; do
+ case "$1" in
+ -ls|--local-staging) STAGINGMETHOD=local-staging; shift ;;
+ -ps|--provider-staging) STAGINGMETHOD=provider-staging; shift ;;
+ *) echo $0: Invalid argument "$1"; Usage; exit 1
+ esac
+done
+
+if [ $# -gt 0 ]; then
+ COMPUTEHOSTS=$*
+fi
+
+echo Staging method: $STAGINGMETHOD
+
+for h in $COMPUTEHOSTS; do
+ echo Host: $h
+ ssh $h rm -rf /tmp/$USER
+ if [ $(expr $h : -) != 0 ]; then
+ echo $0: Error: invalid argument or host name: $h
+ Usage
+ exit 1
+ fi
+done
+
+# COMPUTEHOSTS='crush thwomp stomp crank steamroller grind churn trounce thrash vanquish'
+
+SWIFTBIN=$(dirname $(which swift))
+
+function wait-and-start-workers
+{
+ # Look for:
+ # Passive queue processor initialized. Callback URI is http://140.221.8.62:55379
+
+ for try in $(seq 1 20); do
+ uriline=$(grep "Passive queue processor initialized. Callback URI is" $out 2> /dev/null)
+ if [ "_$uriline" = _ ]; then
+ sleep 1
+ else
+ break;
+ fi
+ done
+
+# FIXME: What happened to the error check here: in case server was not set to passive???
+
+ echo Coaster contact: $SERVICE_URL
+
+ LOGDIR=/tmp/$USER/Swift/workers
+# mkdir -p $LOGDIR # moved this to the ssh command
+
+ IDLETIMEOUT=$((60*60*240)) # 10 days: FIXME: make this a command line arg
+
+ rm -rf remotepid.* # FIXME: should not be needed if we start in a new dir each time
+
+ for host in $(echo $COMPUTEHOSTS); do
+ timestamp=$(date "+%Y.%m%d.%H%M%S")
+ random=$(awk "BEGIN {printf \"%0.5d\", $RANDOM}")
+ ID=$timestamp.$random
+ # FIXME: make logging an argument; set false by default
+ # fixme:send worker.pl to remote host via stdin or scp.
+ # ssh $host '/bin/sh -c '\'"mkdir -p $LOGDIR; WORKER_LOGGING_LEVEL=DEBUG $SWIFTBIN/worker.pl $CONTACT $ID $LOGDIR $IDLETIMEOUT 2>&1 & echo PID=\$!"\' >remotepid.$host </dev/null &
+ ssh $host /bin/sh -c \'"mkdir -p $LOGDIR"\'
+ scp $SWIFTBIN/worker.pl $host:$LOGDIR
+ #ssh $host '/bin/sh -c '\'"WORKER_LOGGING_LEVEL=DEBUG $LOGDIR/worker.pl $CONTACT $ID $LOGDIR $IDLETIMEOUT 2>&1 & echo PID=\$!"\' >remotepid.$host </dev/null &
+ ssh $host '/bin/sh -c '\'"WORKER_LOGGING_LEVEL=DEBUG $LOGDIR/worker.pl $SERVICE_URL $ID $LOGDIR $IDLETIMEOUT 2>&1 & echo PID=\$!"\' >remotepid.$host </dev/null &
+ sshpids="$sshpids $!"
+ done
+
+ echo Started workers from these ssh processes: $sshpids
+ echo $sshpids > $sshpidfile
+}
+
+# make swiftworkers.XXXX temp dir and link swiftworkers/ to it
+rundir=/tmp/$USER/Swift/server
+mkdir -p $(dirname $rundir)
+trundir=$(mktemp -d $rundir.XXXX)
+rm -rf $rundir
+ln -s $trundir $rundir
+
+echo "Logging to $trundir"
+
+cd $rundir
+out=swift.stdouterr
+
+$SWIFTBIN/coaster-service -nosec -passive -portfile service.sport -localportfile service.wport &> service.log &
+coasterservicepid=$!
+
+#wait until the service properly gets started
+sleep 5
+
+SPORT=$(cat service.sport)
+WPORT=$(cat service.wport)
+SERVICE_URL=http://$(hostname -f):$WPORT
+#$SWIFTBIN/coaster-service -nosec -p $SERVICEPORT >& coaster-service.log &
+echo "service host:port is, " $(hostname -f):${SPORT}
+cat >tc <<END
+localhost sh /bin/sh null null null
+END
+
+if [ $STAGINGMETHOD = local-staging ]; then
+
+cat >sites.xml <<ENDS
+<config>
+ <pool handle="localhost">
+ <execution provider="coaster-persistent" url="http://$(hostname -f):${SPORT}" jobmanager="local:local"/>
+ <profile namespace="globus" key="workerManager">passive</profile>
+ <profile namespace="globus" key="workersPerNode">4</profile>
+ <profile key="jobThrottle" namespace="karajan">.03</profile>
+ <profile namespace="karajan" key="initialScore">10000</profile>
+ <filesystem provider="local" url="none" />
+ <workdirectory>$HOME/swiftwork</workdirectory>
+ </pool>
+</config>
+ENDS
+
+cat >cf <<ENDC
+wrapperlog.always.transfer=true
+sitedir.keep=true
+execution.retries=0
+lazy.errors=false
+status.mode=provider
+use.provider.staging=false
+provider.staging.pin.swiftfiles=false
+ENDC
+
+elif [ $STAGINGMETHOD = provider-staging ]; then
+
+cat >sites.xml <<ENDS
+<config>
+ <pool handle="localhost">
+ <execution provider="coaster-persistent" url="http://$(hostname -f):${SPORT}" jobmanager="local:local"/>
+ <profile namespace="globus" key="workerManager">passive</profile>
+ <profile namespace="globus" key="workersPerNode">4</profile>
+ <profile key="jobThrottle" namespace="karajan">.03</profile>
+ <profile namespace="karajan" key="initialScore">10000</profile>
+ <profile namespace="swift" key="stagingMethod">proxy</profile>
+ <workdirectory>/tmp/$USER/SwiftR/swiftwork</workdirectory>
+ </pool>
+</config>
+ENDS
+
+cat >cf <<ENDC
+wrapperlog.always.transfer=true
+sitedir.keep=true
+execution.retries=0
+lazy.errors=false
+status.mode=provider
+use.provider.staging=true
+provider.staging.pin.swiftfiles=false
+ENDC
+
+fi
+
+cat >passivate.swift <<ENDP
+
+type file;
+
+app passivate ()
+{
+ sh "-c" "echo dummy swift job;";
+}
+
+passivate();
+ENDP
+
+touch $out
+sshpidfile=${out/stdouterr/workerpids}
+
+echo swift output is in: $out, pids in $sshpidfile
+
+TRAPS="EXIT 1 2 3 15" # Signals and conditions to trap
+
+function onexit {
+ trap - $TRAPS
+
+ sshpids=$(cat $sshpidfile)
+
+ echo "Terminating worker processes $sshpids, starter $starterpid, and coaster-service pid $coasterservicepid"
+
+ for rpfile in $(ls -1 remotepid.*); do
+ rpid=$(grep PID= $rpfile | sed -e 's/PID=//')
+ rhost=$(echo $rpfile | sed -e 's/remotepid.//')
+ echo Based on $rpfile: terminating process group of process $rpid on $rhost
+ ssh $rhost sh -c \''PGID=$(ps -p '$rpid' -o pgid --no-headers|sed -e "s/ //g"); kill -s TERM -- -$PGID'\'
+ done
+
+ if [ "_$sshpids$starterpid$coasterservicepid" != _ ]; then
+
+ echo kill $sshpids $starterpid $coasterservicepid >& /dev/null
+
+ fi
+
+ kill 0 # Kill # FIXME: what was this for????
+}
+
+trap onexit $TRAPS
+
+wait-and-start-workers &
+starterpid=$!
+
+#$SWIFTBIN/swift -config cf -tc.file tc -sites.file sites.xml passivate.swift 2>&1 </dev/null | tee $out
+
+echo "==> Service started and set to passive mode. Use ^C to terminate all services and workers."
+
+wait
+
Property changes on: trunk/bin/grid/start-mcs
___________________________________________________________________
Added: svn:executable
+ *
Modified: trunk/bin/grid/start-ranger-service
===================================================================
--- trunk/bin/grid/start-ranger-service 2011-08-01 19:04:52 UTC (rev 4921)
+++ trunk/bin/grid/start-ranger-service 2011-08-01 19:29:06 UTC (rev 4922)
@@ -1,6 +1,5 @@
#! /bin/bash
-# FIXME: make these commandline keyword arguments, eg --nodes=
function usage ()
{
echo "Usage:"
More information about the Swift-commit
mailing list