[Swift-commit] r4543 - SwiftApps/SwiftR/Swift/exec

tga at ci.uchicago.edu tga at ci.uchicago.edu
Wed Jun 1 17:15:27 CDT 2011


Author: tga
Date: 2011-06-01 17:15:27 -0500 (Wed, 01 Jun 2011)
New Revision: 4543

Modified:
   SwiftApps/SwiftR/Swift/exec/configure-server-cobalt
   SwiftApps/SwiftR/Swift/exec/configure-server-crayxt
   SwiftApps/SwiftR/Swift/exec/configure-server-local
   SwiftApps/SwiftR/Swift/exec/configure-server-pbs
   SwiftApps/SwiftR/Swift/exec/configure-server-pbsauto
   SwiftApps/SwiftR/Swift/exec/configure-server-sge
   SwiftApps/SwiftR/Swift/exec/configure-server-ssh
   SwiftApps/SwiftR/Swift/exec/start-swift
Log:
cleaning up parameter passing to configure scripts.

Getting pbsauto working, so that automatic pbs job submission is available from R without custom config files.  These changes ahve not been thoroughly tested yet. 



Modified: SwiftApps/SwiftR/Swift/exec/configure-server-cobalt
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-cobalt	2011-06-01 20:01:00 UTC (rev 4542)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-cobalt	2011-06-01 22:15:27 UTC (rev 4543)
@@ -2,9 +2,6 @@
 
 # configuration for cobalt with manually-started Swift workers (passive coasters)
 
-cores=$1
-throttle=$2
-
 cat >tc <<END
 cobalt       bash      /bin/bash null null ENV::PATH="$PATH";GLOBUS::maxwalltime="00:01:00"
 END

Modified: SwiftApps/SwiftR/Swift/exec/configure-server-crayxt
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-crayxt	2011-06-01 20:01:00 UTC (rev 4542)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-crayxt	2011-06-01 22:15:27 UTC (rev 4543)
@@ -2,8 +2,6 @@
 
 # configuration for PBS with manually-started Swift workers (passive coasters)
 
-cores=$1
-throttlePBS=$2
 
 cat >tc <<END
 pbs       bash      /bin/bash null null ENV::PATH="$PATH";GLOBUS::maxwalltime="00:01:00"
@@ -24,7 +22,7 @@
     <!--
     <profile namespace="globus" key="workersPerNode">1</profile>
     -->
-    <profile namespace="karajan" key="jobThrottle">$throttlePBS</profile>
+    <profile namespace="karajan" key="jobThrottle">$throttle</profile>
     <profile namespace="karajan" key="initialScore">10000</profile>
     <filesystem provider="local" url="none"/>
     <profile namespace="env" key="SWIFTR_TMP">$LUSTRE_TMP</profile>

Modified: SwiftApps/SwiftR/Swift/exec/configure-server-local
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-local	2011-06-01 20:01:00 UTC (rev 4542)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-local	2011-06-01 22:15:27 UTC (rev 4543)
@@ -2,8 +2,6 @@
 
 throttleOneCore="0.00"  # FIXME: test if new swft fix makes zero OK rather than -0.001
 
-cores=$1
-
 if [ -r /proc/cpuinfo ]; then
   localcores=$(grep '^processor' /proc/cpuinfo | wc -l)
 else

Modified: SwiftApps/SwiftR/Swift/exec/configure-server-pbs
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-pbs	2011-06-01 20:01:00 UTC (rev 4542)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-pbs	2011-06-01 22:15:27 UTC (rev 4543)
@@ -2,10 +2,6 @@
 
 # configuration for PBS with manually-started Swift workers (passive coasters)
 
-cores=$1
-
-throttlePBS=$2
-
 cat >tc <<END
 pbs       bash      /bin/bash null null ENV::PATH="$PATH";GLOBUS::maxwalltime="00:01:00"
 END
@@ -19,7 +15,7 @@
     <execution provider="coaster" url="none" jobmanager="local:NA"/>
     <profile namespace="globus" key="workerManager">passive</profile>
     <profile namespace="globus" key="workersPerNode">$cores</profile>
-    <profile namespace="karajan" key="jobThrottle">$throttlePBS</profile>
+    <profile namespace="karajan" key="jobThrottle">$throttle</profile>
     <profile namespace="karajan" key="initialScore">10000</profile>
     <filesystem provider="local" url="none"/>
     <profile namespace="env" key="R_LIBS_USER">$R_LIBS_USER</profile>

Modified: SwiftApps/SwiftR/Swift/exec/configure-server-pbsauto
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-pbsauto	2011-06-01 20:01:00 UTC (rev 4542)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-pbsauto	2011-06-01 22:15:27 UTC (rev 4543)
@@ -1,63 +1,74 @@
 #! /usr/bin/env bash
 
-throttlePBS=.31 # FIXME: parameterize thsi and several other variables, below.
-
+#TODO: make configurable
+maxwalltime="00:10:00"
 cat >tc <<END
-fork      bashlocal /bin/bash null null null
-pbscoast  bash      /bin/bash null null ENV::PATH="$PATH";GLOBUS::maxwalltime="00:10:00"
+pbscoast  bash      /bin/bash null null ENV::PATH="$PATH";GLOBUS::maxwalltime="$maxwalltime"
 END
 
 cat >sites.xml <<END
 <config>
-  <pool handle="fork">
-    <execution provider="local" url="none" />
-    <profile key="jobThrottle" namespace="karajan">0.15</profile>
-    <profile namespace="karajan" key="initialScore">10000</profile>
-    <filesystem provider="local" url="none" />
-    <profile namespace="env" key="R_LIBS_USER">$R_LIBS_USER</profile>
-    <workdirectory>$(pwd)/swiftwork</workdirectory>
-  </pool>
 
-  <pool handle="pbsdirect">
-    <execution provider="pbs" url="none" />
-    <profile namespace="globus" key="queue">fast</profile>
-    <profile namespace="globus" key="maxwalltime">00:59:00</profile>
-    <profile key="jobThrottle" namespace="karajan">$throttlePBS</profile>
-    <profile namespace="karajan" key="initialScore">10000</profile>
-    <filesystem provider="local" url="none" />
-    <profile namespace="env" key="R_LIBS_USER">$R_LIBS_USER</profile>
-    <workdirectory>$HOME/swiftwork</workdirectory>
-  </pool>
 
   <pool handle="pbscoast">
     <execution provider="coaster" url="none" jobmanager="local:pbs"/>
-    <profile namespace="globus" key="queue">short</profile>
-    <profile namespace="globus" key="maxTime">12000</profile>
-    <profile namespace="globus" key="slots">32</profile>
-    <profile namespace="globus" key="nodeGranularity">1</profile>
-    <profile namespace="globus" key="maxNodes">1</profile>
-    <profile namespace="globus" key="workersPerNode">1</profile>
-    <profile namespace="karajan" key="jobThrottle">2.55</profile>
+    <filesystem provider="local" url="none"/>
+    <profile namespace="env" key="R_LIBS_USER">$R_LIBS_USER</profile>
+    <workdirectory>$(pwd)/swiftwork</workdirectory>
+    <scratch>$(pwd)/swiftscratch</scratch>
+    <profile namespace="karajan" key="jobThrottle">$throttle</profile>
     <profile namespace="karajan" key="initialScore">10000</profile>
 
-    <filesystem provider="local" url="none"/>
-    <workdirectory>$HOME/swiftwork</workdirectory>
+    <!-- max number of cores in total -->
+    <profile namespace="globus" key="slots">$nodes</profile>
+    <profile namespace="globus" key="workersPerNode">$cores</profile>
+    
+    <!-- these settings control the size of the request blocks
+        put through the batch system -->
+    <profile namespace="globus" key="maxNodes">1</profile>
+    <profile namespace="globus" key="nodeGranularity">1</profile>
+
+
+END
+if [ "$queue" != NONE ]; then
+    #TODO: error handling
+    # assume time in H:M:S format
+    t
+    cat >> sites.xml <<END
+    <profile namespace="globus" key="queue">$queue</profile>
+END
+fi
+
+if [ "$time" != NONE ]; then
+    cat >> sites.xml <<END
+    <profile namespace="globus" key="maxTime">$time_mins</profile>
+END
+fi
+
+cat >> sites.xml <<END
   </pool>
 </config>
 END
 
-#     <profile namespace="globus" key="maxWallTime">00:00:01</profile>
-#     <profile namespace="globus" key="queue">fast</profile>
-
 cat >cf <<END
-wrapperlog.always.transfer=true
+wrapperlog.always.transfer=false
 sitedir.keep=false
 execution.retries=0
 lazy.errors=false
 status.mode=provider
-use.provider.staging=false
-provider.staging.pin.swiftfiles=false
+use.provider.staging=true
+provider.staging.pin.swiftfiles=true
 #throttle.host.submit=1
 END
 
 
+#  <pool handle="pbsdirect">
+#    <execution provider="pbs" url="none" />
+#    <profile namespace="globus" key="queue">fast</profile>
+#    <profile namespace="globus" key="maxwalltime">00:59:00</profile>
+#    <profile key="jobThrottle" namespace="karajan">$throttlePBS</profile>
+#    <profile namespace="karajan" key="initialScore">10000</profile>
+#    <filesystem provider="local" url="none" />
+#    <profile namespace="env" key="R_LIBS_USER">$R_LIBS_USER</profile>
+#    <workdirectory>$HOME/swiftwork</workdirectory>
+#  </pool>

Modified: SwiftApps/SwiftR/Swift/exec/configure-server-sge
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-sge	2011-06-01 20:01:00 UTC (rev 4542)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-sge	2011-06-01 22:15:27 UTC (rev 4543)
@@ -2,9 +2,6 @@
 
 # Generate Swift configuration files for SGE with manually-started Swift workers (passive coasters)
 
-cores=$1
-throttle=$2
-
 cat >tc <<END
 sge       bash      /bin/bash null null ENV::PATH="$PATH";GLOBUS::maxwalltime="00:01:00"
 END

Modified: SwiftApps/SwiftR/Swift/exec/configure-server-ssh
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-ssh	2011-06-01 20:01:00 UTC (rev 4542)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-ssh	2011-06-01 22:15:27 UTC (rev 4543)
@@ -1,8 +1,5 @@
 #! /usr/bin/env bash
 
-cores=$1
-throttle=$2
-time=$3
 
 
 cat >tc <<END
@@ -15,7 +12,7 @@
     <execution provider="coaster" url="none" jobmanager="local:NA"/>
     <profile namespace="globus" key="workerManager">passive</profile>
     <profile namespace="globus" key="workersPerNode">$cores</profile>
-    <profile namespace="karajan" key="jobThrottle">2.55</profile>
+    <profile namespace="karajan" key="jobThrottle">$throttle</profile>
     <profile namespace="karajan" key="initialScore">10000</profile>
     <filesystem provider="local" url="none"/>
     <workdirectory>$(pwd)/swiftwork</workdirectory>

Modified: SwiftApps/SwiftR/Swift/exec/start-swift
===================================================================
--- SwiftApps/SwiftR/Swift/exec/start-swift	2011-06-01 20:01:00 UTC (rev 4542)
+++ SwiftApps/SwiftR/Swift/exec/start-swift	2011-06-01 22:15:27 UTC (rev 4543)
@@ -425,6 +425,34 @@
   fi
 }
 
+verify-is-time()
+{
+    argname=$1; shift
+    #check if positive integer
+    if [ "$1" -gt 0 ] 2> /dev/null; then
+        time_mins=$time
+ #       echo time_mins: $time_mins
+        return 0
+    fi
+
+    d=${1%%:*} 
+    hm=${1#*:}
+    h=${hm%%:*} 
+    m=${hm#*:}
+    # check than they are integers >= 0
+    if [ \( "$h" -ge 0 \) -a \( "$m" -ge 0 \) -a \( "$d" -ge 0 \) \
+        -a \( \( "$h" -ge 0 \) -o \( "$m" -ge 0 \) -o \( "$d" -ge 0 \) \) ]; then
+        time_mins=$(($m + 60 * ($h + $d * 24)))
+#        echo time_mins: $time_mins
+        return 0
+    else
+        echo $0: "value for $argname was neither valid d:m:s time, or positive
+            integer number of minutes"
+        usage
+        exit 1
+    fi
+}
+
 verify-not-null()
 {
   argname=$1; shift
@@ -459,7 +487,7 @@
    -r rcmd     ssh         site specific, SGE only, typically ssh. 
                                     qrsh for siraf cluster
    -s server   local       local, pbs, sge, ssh, pbsf (for firewalled workers)
-                            ,cobalt,crayxt,custom
+                            ,cobalt,crayxt,custom, pbsauto
    -t time     00:30:00    hh:mm:ss, for PBS, Cobalt and SGE only
    -w wkloglvl NONE        NONE, ERROR, WARN, INFO, DEBUG, TRACE
    -k keepdir              No argument, if flag is set, will keep working 
@@ -485,6 +513,7 @@
 
 server=local
 time="00:30:00"
+time_mins=30
 nodes=1
 queue=short
 cores=0
@@ -504,6 +533,7 @@
 sites_file=
 tc_file=
 cf_file=
+warmupjob=true
 
 rcmd=ssh      # rcmd: ssh (typical) or qrsh (eg for siraf with node login restrictions)
 workmode=slot # slot: start one worker on each slot; node: start one worker for all slots on a node
@@ -521,8 +551,8 @@
     -p) throttle=$2; verify-is-numeric throttle $throttle; shift ;;
     -q) queue=$2; verify-not-null queue $queue; shift ;;
     -r) rcmd=$2; verify-is-one-of rcmd $rcmd ssh qrsh; shift ;;
-    -s) server=$2; verify-is-one-of server $server local ssh pbs pbsf sge cobalt crayxt custom; shift ;;
-    -t) time=$2; verify-not-null time $time; shift ;;
+    -s) server=$2; verify-is-one-of server $server local ssh pbs pbsf sge cobalt crayxt custom pbsauto; shift ;;
+    -t) time=$2; verify-is-time time $time; shift ;;
     -w) workerLogging=$2; verify-is-one-of workerLoggingLevel $workerLogging NONE ERROR WARN INFO DEBUG TRACE; shift ;;
     -L) swiftLoggingFlag="" ;; # swift default is lots of logging
     -k) keepdir=TRUE ;;
@@ -626,6 +656,7 @@
 exitcmd=""
 
 if [ $server = custom ]; then
+    warmupjob=false
     # have already set up tc.data and sites.xml files, just set
     #onexit
     function onexit {
@@ -640,13 +671,13 @@
       echo ok > ackfifo
     fi
 elif [ $server = local ]; then
-
+  warmupjob=true
   if [ $cores -eq 0 ]; then
     cores=$defaultLocalCores
   fi
-  echo project=$project cores=$cores nodes=$nodes queue=$queue server=$server throttle=$throttle
+  echo project=$project cores=$cores nodes=$nodes queue=$queue server=$server 
  
-  source $SWIFTRBIN/configure-server-local $cores
+  source $SWIFTRBIN/configure-server-local
   function onexit {
     stdcleanup_start
     # Find and terminate R workers: they should register their PiD
@@ -671,13 +702,13 @@
     echo ok > ackfifo
   fi
 elif [ $server = ssh ]; then
-  
+  warmupjob=true
   if [ $cores -eq 0 ]; then
     cores=$defaultSshCores
   fi
-  echo project=$project cores=$cores nodes=$nodes queue=$queue server=$server throttle=$throttle
+  echo project=$project cores=$cores nodes=$nodes queue=$queue server=$server 
 
-  source $SWIFTRBIN/configure-server-ssh $cores $throttle $time
+  source $SWIFTRBIN/configure-server-ssh
 
   sshpidfile=${out/stdouterr/workerpids}
 
@@ -711,18 +742,44 @@
   wait-and-start-ssh-workers &
   starterpid=$!
 
+elif [ \( $server = pbsauto \) ]; then
+    warmupjob=false
+    # Systems where Swift manages workers
+  if [ $cores -le 0 ]; then
+    cores=$defaultClusterCores
+  fi
+  echo server=$server project=$project cores=$cores nodes=$nodes queue=$queue 
+  source $SWIFTRBIN/configure-server-pbsauto
+
+  function onexit {
+    stdcleanup_start
+    trap - $TRAPEVENTS
+    # exit cleanly
+    stdcleanup_end
+    exit 0   
+}
+
+  trap onexit $TRAPEVENTS
+  exitcmd=onexit
+
+
+  if [ "$doack" = TRUE ]; then
+    echo ok > ackfifo
+  fi
+
 elif [ \( $server = pbs \) -o \( $server = pbsf \) -o \( $server = sge \) \
         -o \( $server = cobalt \) -o \( $server = crayxt \) ]; then
-
-  if [ $cores -eq 0 ]; then
+  warmupjob=true
+  # Batch systems where we need to launch workers
+  if [ $cores -le 0 ]; then
     cores=$defaultClusterCores
   fi
-  echo project=$project cores=$cores nodes=$nodes queue=$queue server=$server throttle=$throttle
+  echo server=$server project=$project cores=$cores nodes=$nodes queue=$queue 
   if [ $server = pbsf ]
   then
-    source $SWIFTRBIN/configure-server-pbs $cores $throttle
+    source $SWIFTRBIN/configure-server-pbs
   else
-    source $SWIFTRBIN/configure-server-${server} $cores $throttle
+    source $SWIFTRBIN/configure-server-${server}
   fi
 
   jobidfile=${out/stdouterr/jobid}
@@ -756,7 +813,7 @@
 
 $SWIFTRBIN/../swift/bin/swift $swiftLoggingFlag \
         -config cf -tc.file tc -sites.file sites.xml $script -pipedir=$(pwd) \
-        -warmup=true \
+        -warmup=$warmupjob \
         >& $out </dev/null
 exitcode=$?
 # Do any cleanup if swift exits in this manner




More information about the Swift-commit mailing list