[Swift-commit] r5197 - in SwiftApps/SwiftR/Swift: exec man
tga at ci.uchicago.edu
tga at ci.uchicago.edu
Thu Sep 29 19:16:08 CDT 2011
Author: tga
Date: 2011-09-29 19:16:08 -0500 (Thu, 29 Sep 2011)
New Revision: 5197
Added:
SwiftApps/SwiftR/Swift/exec/configure-server-crayxtauto
Modified:
SwiftApps/SwiftR/Swift/exec/configure-server-pbsauto
SwiftApps/SwiftR/Swift/exec/start-swift
SwiftApps/SwiftR/Swift/man/swiftInit.Rd
Log:
Added crayxtauto server
Added: SwiftApps/SwiftR/Swift/exec/configure-server-crayxtauto
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-crayxtauto (rev 0)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-crayxtauto 2011-09-30 00:16:08 UTC (rev 5197)
@@ -0,0 +1,87 @@
+#! /usr/bin/env bash
+
+#TODO: is this best way to do this.
+
+# we assume that $time_secs specifies the lengths of jobs
+# that should be submitted to pbs.
+#We don't really know the expected duration of R jobs, so we will come up
+# with a sensible value based on $time-secs
+#
+#Swift kills jobs that run for double maxwalltime, so maxwalltime should be
+# > 0.5 $time_secs
+# Ideally we want to run multiple jobs in each batch allocation, so we don't
+# want coasters to mistakenly decide that it can't schedule a job when we
+# have most of the batch allocation left. So we really want
+# maxwalltime to be just over 0.5 $time_secs, once we round both to minutes
+maxwalltime=$(( ($time_secs + 120) / 120 ))
+
+
+cat >tc <<END
+pbscoast bash /bin/bash null null ENV::PATH="$PATH";GLOBUS::maxwalltime="$maxwalltime"
+END
+
+cat >sites.xml <<END
+<config>
+
+
+ <pool handle="pbscoast">
+ <execution provider="coaster" url="none" jobmanager="local:pbs"/>
+ <filesystem provider="local" url="none"/>
+ <profile namespace="env" key="R_LIBS_USER">$R_LIBS_USER</profile>
+ <profile namespace="globus" key="providerAttributes">pbs.aprun;pbs.mpp;depth=$cores</profile>
+
+ <profile namespace="karajan" key="jobThrottle">$throttle</profile>
+ <profile namespace="karajan" key="initialScore">10000</profile>
+
+ <!-- max number of cores in total -->
+ <profile namespace="globus" key="slots">$nodes</profile>
+ <profile namespace="globus" key="workersPerNode">$cores</profile>
+ <profile namespace="globus" key="ppn">$cores:cray:pack</profile>
+
+ <!-- these settings control the size of the request blocks
+ put through the batch system -->
+ <profile namespace="globus" key="maxNodes">1</profile>
+ <profile namespace="globus" key="nodeGranularity">1</profile>
+ <profile namespace="globus" key="lowOverallocation">2</profile>
+ <profile namespace="globus" key="highOverallocation">2</profile>
+ <profile namespace="env" key="SWIFTR_TMP">/dev/shm/$USER/</profile>
+ <profile namespace="env" key="TMPDIR">/dev/shm</profile>
+ <workdirectory>/dev/shm/$USER/swiftwork</workdirectory>
+ <scratch>/dev/shm/$USER/swiftscratch</scratch>
+
+END
+if [ "$project" != NONE]; then
+ cat >> sites.xml <<END
+ <profile namespace="globus" key="project">$project</profile>
+END
+fi
+if [ "$queue" != NONE ]; then
+ cat >> sites.xml <<END
+ <profile namespace="globus" key="queue">$queue</profile>
+END
+fi
+
+if [ "$time" != NONE ]; then
+ # Hack: Add 60 seconds to time to convince to request blocks for
+ # full time.
+ cat >> sites.xml <<END
+ <profile namespace="globus" key="maxTime">$((time_secs + 60))</profile>
+END
+fi
+
+cat >> sites.xml <<END
+ </pool>
+</config>
+END
+
+cat >cf <<END
+wrapperlog.always.transfer=false
+sitedir.keep=false
+execution.retries=$num_retries
+lazy.errors=false
+status.mode=provider
+use.provider.staging=true
+provider.staging.pin.swiftfiles=true
+#throttle.host.submit=1
+END
+
Modified: SwiftApps/SwiftR/Swift/exec/configure-server-pbsauto
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-pbsauto 2011-09-29 20:05:16 UTC (rev 5196)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-pbsauto 2011-09-30 00:16:08 UTC (rev 5197)
@@ -49,7 +49,6 @@
if [ "$queue" != NONE ]; then
#TODO: error handling
# assume time in H:M:S format
- t
cat >> sites.xml <<END
<profile namespace="globus" key="queue">$queue</profile>
END
Modified: SwiftApps/SwiftR/Swift/exec/start-swift
===================================================================
--- SwiftApps/SwiftR/Swift/exec/start-swift 2011-09-29 20:05:16 UTC (rev 5196)
+++ SwiftApps/SwiftR/Swift/exec/start-swift 2011-09-30 00:16:08 UTC (rev 5197)
@@ -88,7 +88,34 @@
fi
}
+function setup-crayxt-lustre {
+ # Set up working directories on lustre file system
+ #FIXME: beagle-specific code
+ LUSTRE_TMPROOT=/lustre/beagle/$USER/swiftRtmp
+ if mkdir -p $LUSTRE_TMPROOT; then
+ :
+ else
+ echo "Could not create temporary directory $LUSTRE_TMPROOT"
+ stdcleanup_start
+ stdcleanup_end
+ exit 1
+ fi
+
+ while true
+ do
+ LUSTRE_TMPSESSION=$LUSTRE_TMPROOT/$RANDOM
+ if mkdir $LUSTRE_TMPSESSION; then
+ echo "Temporary files will be stored in $LUSTRE_TMPSESSION" 1>&2
+ break
+ fi
+ done
+ # Cray XT cluster nodes don't have local writable tmp storage
+ export LUSTRE_TMP=$LUSTRE_TMPSESSION
+ export LUSTRE_RTMP=$LUSTRE_TMPSESSION/Rtmp
+ mkdir -p $LUSTRE_RTMP
+}
+
make-pbs-submit-file()
{
SUBMIT_FILE=$1
@@ -512,7 +539,7 @@
-r rcmd ssh site specific, SGE only, typically ssh.
qrsh for siraf cluster
-s server local local, pbs, sge, ssh, pbsf (for firewalled workers)
- ,cobalt,crayxt,custom, pbsauto
+ ,cobalt,crayxt,custom, pbsauto, crayxtauto
-t time 00:30:00 hh:mm:ss, for PBS, Cobalt and SGE only
-w wkloglvl NONE NONE, ERROR, WARN, INFO, DEBUG, TRACE
-k keepdir No argument, if flag is set, will keep working
@@ -577,7 +604,7 @@
-p) throttle=$2; verify-is-numeric throttle $throttle; shift ;;
-q) queue=$2; verify-not-null queue $queue; shift ;;
-r) rcmd=$2; verify-is-one-of rcmd $rcmd ssh qrsh; shift ;;
- -s) server=$2; verify-is-one-of server $server local ssh pbs pbsf sge cobalt crayxt custom pbsauto; shift ;;
+ -s) server=$2; verify-is-one-of server $server local ssh pbs pbsf sge cobalt crayxt custom pbsauto crayxtauto; shift ;;
-t) time=$2; verify-is-time time $time; shift ;;
-w) workerLogging=$2; verify-is-one-of workerLoggingLevel $workerLogging NONE ERROR WARN INFO DEBUG TRACE; shift ;;
-L) swiftLoggingFlag="" ;; # swift default is lots of logging
@@ -699,6 +726,12 @@
# Function to run on termination of swift
exitcmd=""
+# Set up working directories on crayxt
+if [ $server = crayxt -o $server = crayxtauto ] ; then
+ setup-crayxt-lustre
+fi
+
+
if [ $server = custom ]; then
warmupjob=false
# have already set up tc.data and sites.xml files, just set
@@ -775,14 +808,14 @@
wait-and-start-ssh-workers &
starterpid=$!
-elif [ \( $server = pbsauto \) ]; then
+elif [ \( $server = pbsauto \) -o \( $server = crayxtauto \) ]; then
warmupjob=false
# Systems where Swift manages workers
if [ $cores -le 0 ]; then
cores=$defaultClusterCores
fi
echo server=$server project=$project cores=$cores nodes=$nodes queue=$queue
- source $SWIFTRBIN/configure-server-pbsauto
+ source $SWIFTRBIN/configure-server-$server
function onexit {
stdcleanup_start
@@ -811,30 +844,6 @@
DIRS_TO_DELETE=
if [ $server = crayxt ]; then
- #FIXME: beagle-specific code
- LUSTRE_TMPROOT=/lustre/beagle/$USER/swiftRtmp
- if mkdir -p $LUSTRE_TMPROOT; then
- :
- else
- echo "Could not create temporary directory $LUSTRE_TMPROOT"
- stdcleanup_start
- stdcleanup_end
- exit 1
- fi
-
-
- while true
- do
- LUSTRE_TMPSESSION=$LUSTRE_TMPROOT/$RANDOM
- if mkdir $LUSTRE_TMPSESSION; then
- echo "Temporary files will be stored in $LUSTRE_TMPSESSION" 1>&2
- break
- fi
- done
- # Cray XT cluster nodes don't have local writable tmp storage
- export LUSTRE_TMP=$LUSTRE_TMPSESSION
- export LUSTRE_RTMP=$LUSTRE_TMPSESSION/Rtmp
- mkdir -p $LUSTRE_RTMP
source $SWIFTRBIN/configure-server-crayxt
elif [ $server = pbsf ]; then
source $SWIFTRBIN/configure-server-pbs
Modified: SwiftApps/SwiftR/Swift/man/swiftInit.Rd
===================================================================
--- SwiftApps/SwiftR/Swift/man/swiftInit.Rd 2011-09-29 20:05:16 UTC (rev 5196)
+++ SwiftApps/SwiftR/Swift/man/swiftInit.Rd 2011-09-30 00:16:08 UTC (rev 5197)
@@ -25,7 +25,8 @@
The number of cores per host. The default values vary from 2 to 8 depending on the server type.
}
\item{server}{
- One of: "local", "ssh", "pbs", "sge", "pbsf", "cobalt".
+ One of: "local", "ssh", "pbs", "sge", "pbsf", "cobalt", "crayxt",
+ "pbsauto", "crayxtauto".
How Swift will run the jobs: for example, if "local" is chosen, they
will be run on the local machine, or if "pbs" is chosen, they will be
run through the pbs scheduler.
More information about the Swift-commit
mailing list