[Swift-commit] r4543 - SwiftApps/SwiftR/Swift/exec
tga at ci.uchicago.edu
tga at ci.uchicago.edu
Wed Jun 1 17:15:27 CDT 2011
Author: tga
Date: 2011-06-01 17:15:27 -0500 (Wed, 01 Jun 2011)
New Revision: 4543
Modified:
SwiftApps/SwiftR/Swift/exec/configure-server-cobalt
SwiftApps/SwiftR/Swift/exec/configure-server-crayxt
SwiftApps/SwiftR/Swift/exec/configure-server-local
SwiftApps/SwiftR/Swift/exec/configure-server-pbs
SwiftApps/SwiftR/Swift/exec/configure-server-pbsauto
SwiftApps/SwiftR/Swift/exec/configure-server-sge
SwiftApps/SwiftR/Swift/exec/configure-server-ssh
SwiftApps/SwiftR/Swift/exec/start-swift
Log:
cleaning up parameter passing to configure scripts.
Getting pbsauto working, so that automatic pbs job submission is available from R without custom config files. These changes ahve not been thoroughly tested yet.
Modified: SwiftApps/SwiftR/Swift/exec/configure-server-cobalt
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-cobalt 2011-06-01 20:01:00 UTC (rev 4542)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-cobalt 2011-06-01 22:15:27 UTC (rev 4543)
@@ -2,9 +2,6 @@
# configuration for cobalt with manually-started Swift workers (passive coasters)
-cores=$1
-throttle=$2
-
cat >tc <<END
cobalt bash /bin/bash null null ENV::PATH="$PATH";GLOBUS::maxwalltime="00:01:00"
END
Modified: SwiftApps/SwiftR/Swift/exec/configure-server-crayxt
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-crayxt 2011-06-01 20:01:00 UTC (rev 4542)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-crayxt 2011-06-01 22:15:27 UTC (rev 4543)
@@ -2,8 +2,6 @@
# configuration for PBS with manually-started Swift workers (passive coasters)
-cores=$1
-throttlePBS=$2
cat >tc <<END
pbs bash /bin/bash null null ENV::PATH="$PATH";GLOBUS::maxwalltime="00:01:00"
@@ -24,7 +22,7 @@
<!--
<profile namespace="globus" key="workersPerNode">1</profile>
-->
- <profile namespace="karajan" key="jobThrottle">$throttlePBS</profile>
+ <profile namespace="karajan" key="jobThrottle">$throttle</profile>
<profile namespace="karajan" key="initialScore">10000</profile>
<filesystem provider="local" url="none"/>
<profile namespace="env" key="SWIFTR_TMP">$LUSTRE_TMP</profile>
Modified: SwiftApps/SwiftR/Swift/exec/configure-server-local
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-local 2011-06-01 20:01:00 UTC (rev 4542)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-local 2011-06-01 22:15:27 UTC (rev 4543)
@@ -2,8 +2,6 @@
throttleOneCore="0.00" # FIXME: test if new swft fix makes zero OK rather than -0.001
-cores=$1
-
if [ -r /proc/cpuinfo ]; then
localcores=$(grep '^processor' /proc/cpuinfo | wc -l)
else
Modified: SwiftApps/SwiftR/Swift/exec/configure-server-pbs
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-pbs 2011-06-01 20:01:00 UTC (rev 4542)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-pbs 2011-06-01 22:15:27 UTC (rev 4543)
@@ -2,10 +2,6 @@
# configuration for PBS with manually-started Swift workers (passive coasters)
-cores=$1
-
-throttlePBS=$2
-
cat >tc <<END
pbs bash /bin/bash null null ENV::PATH="$PATH";GLOBUS::maxwalltime="00:01:00"
END
@@ -19,7 +15,7 @@
<execution provider="coaster" url="none" jobmanager="local:NA"/>
<profile namespace="globus" key="workerManager">passive</profile>
<profile namespace="globus" key="workersPerNode">$cores</profile>
- <profile namespace="karajan" key="jobThrottle">$throttlePBS</profile>
+ <profile namespace="karajan" key="jobThrottle">$throttle</profile>
<profile namespace="karajan" key="initialScore">10000</profile>
<filesystem provider="local" url="none"/>
<profile namespace="env" key="R_LIBS_USER">$R_LIBS_USER</profile>
Modified: SwiftApps/SwiftR/Swift/exec/configure-server-pbsauto
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-pbsauto 2011-06-01 20:01:00 UTC (rev 4542)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-pbsauto 2011-06-01 22:15:27 UTC (rev 4543)
@@ -1,63 +1,74 @@
#! /usr/bin/env bash
-throttlePBS=.31 # FIXME: parameterize thsi and several other variables, below.
-
+#TODO: make configurable
+maxwalltime="00:10:00"
cat >tc <<END
-fork bashlocal /bin/bash null null null
-pbscoast bash /bin/bash null null ENV::PATH="$PATH";GLOBUS::maxwalltime="00:10:00"
+pbscoast bash /bin/bash null null ENV::PATH="$PATH";GLOBUS::maxwalltime="$maxwalltime"
END
cat >sites.xml <<END
<config>
- <pool handle="fork">
- <execution provider="local" url="none" />
- <profile key="jobThrottle" namespace="karajan">0.15</profile>
- <profile namespace="karajan" key="initialScore">10000</profile>
- <filesystem provider="local" url="none" />
- <profile namespace="env" key="R_LIBS_USER">$R_LIBS_USER</profile>
- <workdirectory>$(pwd)/swiftwork</workdirectory>
- </pool>
- <pool handle="pbsdirect">
- <execution provider="pbs" url="none" />
- <profile namespace="globus" key="queue">fast</profile>
- <profile namespace="globus" key="maxwalltime">00:59:00</profile>
- <profile key="jobThrottle" namespace="karajan">$throttlePBS</profile>
- <profile namespace="karajan" key="initialScore">10000</profile>
- <filesystem provider="local" url="none" />
- <profile namespace="env" key="R_LIBS_USER">$R_LIBS_USER</profile>
- <workdirectory>$HOME/swiftwork</workdirectory>
- </pool>
<pool handle="pbscoast">
<execution provider="coaster" url="none" jobmanager="local:pbs"/>
- <profile namespace="globus" key="queue">short</profile>
- <profile namespace="globus" key="maxTime">12000</profile>
- <profile namespace="globus" key="slots">32</profile>
- <profile namespace="globus" key="nodeGranularity">1</profile>
- <profile namespace="globus" key="maxNodes">1</profile>
- <profile namespace="globus" key="workersPerNode">1</profile>
- <profile namespace="karajan" key="jobThrottle">2.55</profile>
+ <filesystem provider="local" url="none"/>
+ <profile namespace="env" key="R_LIBS_USER">$R_LIBS_USER</profile>
+ <workdirectory>$(pwd)/swiftwork</workdirectory>
+ <scratch>$(pwd)/swiftscratch</scratch>
+ <profile namespace="karajan" key="jobThrottle">$throttle</profile>
<profile namespace="karajan" key="initialScore">10000</profile>
- <filesystem provider="local" url="none"/>
- <workdirectory>$HOME/swiftwork</workdirectory>
+ <!-- max number of cores in total -->
+ <profile namespace="globus" key="slots">$nodes</profile>
+ <profile namespace="globus" key="workersPerNode">$cores</profile>
+
+ <!-- these settings control the size of the request blocks
+ put through the batch system -->
+ <profile namespace="globus" key="maxNodes">1</profile>
+ <profile namespace="globus" key="nodeGranularity">1</profile>
+
+
+END
+if [ "$queue" != NONE ]; then
+ #TODO: error handling
+ # assume time in H:M:S format
+ t
+ cat >> sites.xml <<END
+ <profile namespace="globus" key="queue">$queue</profile>
+END
+fi
+
+if [ "$time" != NONE ]; then
+ cat >> sites.xml <<END
+ <profile namespace="globus" key="maxTime">$time_mins</profile>
+END
+fi
+
+cat >> sites.xml <<END
</pool>
</config>
END
-# <profile namespace="globus" key="maxWallTime">00:00:01</profile>
-# <profile namespace="globus" key="queue">fast</profile>
-
cat >cf <<END
-wrapperlog.always.transfer=true
+wrapperlog.always.transfer=false
sitedir.keep=false
execution.retries=0
lazy.errors=false
status.mode=provider
-use.provider.staging=false
-provider.staging.pin.swiftfiles=false
+use.provider.staging=true
+provider.staging.pin.swiftfiles=true
#throttle.host.submit=1
END
+# <pool handle="pbsdirect">
+# <execution provider="pbs" url="none" />
+# <profile namespace="globus" key="queue">fast</profile>
+# <profile namespace="globus" key="maxwalltime">00:59:00</profile>
+# <profile key="jobThrottle" namespace="karajan">$throttlePBS</profile>
+# <profile namespace="karajan" key="initialScore">10000</profile>
+# <filesystem provider="local" url="none" />
+# <profile namespace="env" key="R_LIBS_USER">$R_LIBS_USER</profile>
+# <workdirectory>$HOME/swiftwork</workdirectory>
+# </pool>
Modified: SwiftApps/SwiftR/Swift/exec/configure-server-sge
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-sge 2011-06-01 20:01:00 UTC (rev 4542)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-sge 2011-06-01 22:15:27 UTC (rev 4543)
@@ -2,9 +2,6 @@
# Generate Swift configuration files for SGE with manually-started Swift workers (passive coasters)
-cores=$1
-throttle=$2
-
cat >tc <<END
sge bash /bin/bash null null ENV::PATH="$PATH";GLOBUS::maxwalltime="00:01:00"
END
Modified: SwiftApps/SwiftR/Swift/exec/configure-server-ssh
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-ssh 2011-06-01 20:01:00 UTC (rev 4542)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-ssh 2011-06-01 22:15:27 UTC (rev 4543)
@@ -1,8 +1,5 @@
#! /usr/bin/env bash
-cores=$1
-throttle=$2
-time=$3
cat >tc <<END
@@ -15,7 +12,7 @@
<execution provider="coaster" url="none" jobmanager="local:NA"/>
<profile namespace="globus" key="workerManager">passive</profile>
<profile namespace="globus" key="workersPerNode">$cores</profile>
- <profile namespace="karajan" key="jobThrottle">2.55</profile>
+ <profile namespace="karajan" key="jobThrottle">$throttle</profile>
<profile namespace="karajan" key="initialScore">10000</profile>
<filesystem provider="local" url="none"/>
<workdirectory>$(pwd)/swiftwork</workdirectory>
Modified: SwiftApps/SwiftR/Swift/exec/start-swift
===================================================================
--- SwiftApps/SwiftR/Swift/exec/start-swift 2011-06-01 20:01:00 UTC (rev 4542)
+++ SwiftApps/SwiftR/Swift/exec/start-swift 2011-06-01 22:15:27 UTC (rev 4543)
@@ -425,6 +425,34 @@
fi
}
+verify-is-time()
+{
+ argname=$1; shift
+ #check if positive integer
+ if [ "$1" -gt 0 ] 2> /dev/null; then
+ time_mins=$time
+ # echo time_mins: $time_mins
+ return 0
+ fi
+
+ d=${1%%:*}
+ hm=${1#*:}
+ h=${hm%%:*}
+ m=${hm#*:}
+ # check than they are integers >= 0
+ if [ \( "$h" -ge 0 \) -a \( "$m" -ge 0 \) -a \( "$d" -ge 0 \) \
+ -a \( \( "$h" -ge 0 \) -o \( "$m" -ge 0 \) -o \( "$d" -ge 0 \) \) ]; then
+ time_mins=$(($m + 60 * ($h + $d * 24)))
+# echo time_mins: $time_mins
+ return 0
+ else
+ echo $0: "value for $argname was neither valid d:m:s time, or positive
+ integer number of minutes"
+ usage
+ exit 1
+ fi
+}
+
verify-not-null()
{
argname=$1; shift
@@ -459,7 +487,7 @@
-r rcmd ssh site specific, SGE only, typically ssh.
qrsh for siraf cluster
-s server local local, pbs, sge, ssh, pbsf (for firewalled workers)
- ,cobalt,crayxt,custom
+ ,cobalt,crayxt,custom, pbsauto
-t time 00:30:00 hh:mm:ss, for PBS, Cobalt and SGE only
-w wkloglvl NONE NONE, ERROR, WARN, INFO, DEBUG, TRACE
-k keepdir No argument, if flag is set, will keep working
@@ -485,6 +513,7 @@
server=local
time="00:30:00"
+time_mins=30
nodes=1
queue=short
cores=0
@@ -504,6 +533,7 @@
sites_file=
tc_file=
cf_file=
+warmupjob=true
rcmd=ssh # rcmd: ssh (typical) or qrsh (eg for siraf with node login restrictions)
workmode=slot # slot: start one worker on each slot; node: start one worker for all slots on a node
@@ -521,8 +551,8 @@
-p) throttle=$2; verify-is-numeric throttle $throttle; shift ;;
-q) queue=$2; verify-not-null queue $queue; shift ;;
-r) rcmd=$2; verify-is-one-of rcmd $rcmd ssh qrsh; shift ;;
- -s) server=$2; verify-is-one-of server $server local ssh pbs pbsf sge cobalt crayxt custom; shift ;;
- -t) time=$2; verify-not-null time $time; shift ;;
+ -s) server=$2; verify-is-one-of server $server local ssh pbs pbsf sge cobalt crayxt custom pbsauto; shift ;;
+ -t) time=$2; verify-is-time time $time; shift ;;
-w) workerLogging=$2; verify-is-one-of workerLoggingLevel $workerLogging NONE ERROR WARN INFO DEBUG TRACE; shift ;;
-L) swiftLoggingFlag="" ;; # swift default is lots of logging
-k) keepdir=TRUE ;;
@@ -626,6 +656,7 @@
exitcmd=""
if [ $server = custom ]; then
+ warmupjob=false
# have already set up tc.data and sites.xml files, just set
#onexit
function onexit {
@@ -640,13 +671,13 @@
echo ok > ackfifo
fi
elif [ $server = local ]; then
-
+ warmupjob=true
if [ $cores -eq 0 ]; then
cores=$defaultLocalCores
fi
- echo project=$project cores=$cores nodes=$nodes queue=$queue server=$server throttle=$throttle
+ echo project=$project cores=$cores nodes=$nodes queue=$queue server=$server
- source $SWIFTRBIN/configure-server-local $cores
+ source $SWIFTRBIN/configure-server-local
function onexit {
stdcleanup_start
# Find and terminate R workers: they should register their PiD
@@ -671,13 +702,13 @@
echo ok > ackfifo
fi
elif [ $server = ssh ]; then
-
+ warmupjob=true
if [ $cores -eq 0 ]; then
cores=$defaultSshCores
fi
- echo project=$project cores=$cores nodes=$nodes queue=$queue server=$server throttle=$throttle
+ echo project=$project cores=$cores nodes=$nodes queue=$queue server=$server
- source $SWIFTRBIN/configure-server-ssh $cores $throttle $time
+ source $SWIFTRBIN/configure-server-ssh
sshpidfile=${out/stdouterr/workerpids}
@@ -711,18 +742,44 @@
wait-and-start-ssh-workers &
starterpid=$!
+elif [ \( $server = pbsauto \) ]; then
+ warmupjob=false
+ # Systems where Swift manages workers
+ if [ $cores -le 0 ]; then
+ cores=$defaultClusterCores
+ fi
+ echo server=$server project=$project cores=$cores nodes=$nodes queue=$queue
+ source $SWIFTRBIN/configure-server-pbsauto
+
+ function onexit {
+ stdcleanup_start
+ trap - $TRAPEVENTS
+ # exit cleanly
+ stdcleanup_end
+ exit 0
+}
+
+ trap onexit $TRAPEVENTS
+ exitcmd=onexit
+
+
+ if [ "$doack" = TRUE ]; then
+ echo ok > ackfifo
+ fi
+
elif [ \( $server = pbs \) -o \( $server = pbsf \) -o \( $server = sge \) \
-o \( $server = cobalt \) -o \( $server = crayxt \) ]; then
-
- if [ $cores -eq 0 ]; then
+ warmupjob=true
+ # Batch systems where we need to launch workers
+ if [ $cores -le 0 ]; then
cores=$defaultClusterCores
fi
- echo project=$project cores=$cores nodes=$nodes queue=$queue server=$server throttle=$throttle
+ echo server=$server project=$project cores=$cores nodes=$nodes queue=$queue
if [ $server = pbsf ]
then
- source $SWIFTRBIN/configure-server-pbs $cores $throttle
+ source $SWIFTRBIN/configure-server-pbs
else
- source $SWIFTRBIN/configure-server-${server} $cores $throttle
+ source $SWIFTRBIN/configure-server-${server}
fi
jobidfile=${out/stdouterr/jobid}
@@ -756,7 +813,7 @@
$SWIFTRBIN/../swift/bin/swift $swiftLoggingFlag \
-config cf -tc.file tc -sites.file sites.xml $script -pipedir=$(pwd) \
- -warmup=true \
+ -warmup=$warmupjob \
>& $out </dev/null
exitcode=$?
# Do any cleanup if swift exits in this manner
More information about the Swift-commit
mailing list