[Swift-commit] r3967 - SwiftApps/SwiftR/Swift/exec

noreply at svn.ci.uchicago.edu noreply at svn.ci.uchicago.edu
Fri Jan 14 21:08:22 CST 2011


Author: wilde
Date: 2011-01-14 21:08:22 -0600 (Fri, 14 Jan 2011)
New Revision: 3967

Added:
   SwiftApps/SwiftR/Swift/exec/configure-server-sge
Modified:
   SwiftApps/SwiftR/Swift/exec/start-swift
Log:
Changes for SGE on siraf. Needs further testing.

Added: SwiftApps/SwiftR/Swift/exec/configure-server-sge
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-sge	                        (rev 0)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-sge	2011-01-15 03:08:22 UTC (rev 3967)
@@ -0,0 +1,85 @@
+#! /bin/bash
+
+# Generate Swift configuration files for SGE with manually-started Swift workers (passive coasters)
+
+cores=$1
+throttle=5.0 # allow approximately 500 concurrent jobs
+
+cat >tc <<END
+fork      bashlocal /bin/bash null null GLOBUS::maxwalltime="00:00:10"
+sge       bash      /bin/bash null null ENV::PATH="$PATH";GLOBUS::maxwalltime="00:01:00"
+END
+
+# FIXME: examine effect of 1-min default maxwalltime above
+# FIXME: determine best value for throttle below
+
+cat >sites.xml <<END
+<config>
+
+  <pool handle="fork">
+    <execution provider="local" url="none"/>
+    <profile key="jobThrottle" namespace="karajan">0.15</profile>
+    <profile namespace="karajan" key="initialScore">10000</profile>
+    <filesystem provider="local" url="none" />
+    <workdirectory>$(pwd)/swiftwork</workdirectory>
+    <profile namespace="swift" key="stagingMethod">file</profile>
+  </pool>
+
+  <pool handle="sge">
+    <execution provider="coaster" url="none" jobmanager="local:NA"/>
+    <profile namespace="globus" key="workerManager">passive</profile>
+    <!-- <profile namespace="globus" key="workersPerNode">$cores</profile> -->
+    <profile namespace="globus" key="workersPerNode">1</profile>
+    <profile namespace="karajan" key="jobThrottle">$throttle</profile>
+    <profile namespace="karajan" key="initialScore">10000</profile>
+    <filesystem provider="local" url="none"/>
+    <workdirectory>$HOME/swiftwork</workdirectory>
+    <scratch>/tmp/$USER/swiftwork</scratch>
+    <profile namespace="swift" key="stagingMethod">proxy</profile>
+  </pool>
+
+</config>
+END
+
+# Note abve: workdirectory for cluster must be on shared filesystem
+
+cat >cf <<END
+wrapperlog.always.transfer=false
+sitedir.keep=true
+execution.retries=0
+lazy.errors=false
+status.mode=provider
+use.provider.staging=true
+provider.staging.pin.swiftfiles=true
+#throttle.host.submit=1
+END
+
+: SAVE FOR REFERENCE <<END
+
+  <pool handle="fork">
+    <execution provider="coaster" url="none" jobmanager="local:local"/>
+    <profile namespace="globus" key="maxtime">300000</profile>
+    <profile namespace="globus" key="workersPerNode">4</profile>
+    <profile namespace="globus" key="slots">1</profile>
+    <profile namespace="globus" key="maxnodes">1</profile>
+    <profile key="jobThrottle" namespace="karajan">0.15</profile>
+    <profile namespace="karajan" key="initialScore">10000</profile>
+    <filesystem provider="local" url="none" />
+    <workdirectory>$(pwd)/swiftwork</workdirectory>
+    <profile namespace="swift" key="stagingMethod">proxy</profile>
+  </pool>
+
+  <pool handle="localhost">
+    <!-- <execution provider="coaster-persistent" url="http://bridled.ci.uchicago.edu:" jobmanager="local:local"/> -->
+    <execution provider="coaster" url="none" jobmanager="local:local"/>
+    <!-- <profile namespace="globus" key="workerManager">passive</profile> -->
+    <profile namespace="globus" key="workersPerNode">8</profile>
+    <profile namespace="globus" key="slots">1</profile>
+    <profile namespace="globus" key="maxnodes">1</profile>
+    <profile key="jobThrottle" namespace="karajan">.15</profile>
+    <profile namespace="karajan" key="initialScore">10000</profile>
+    <profile namespace="swift" key="stagingMethod">proxy</profile>
+    <workdirectory>/scratch/local/wilde/pstest/swiftwork</workdirectory>
+  </pool>
+
+END


Property changes on: SwiftApps/SwiftR/Swift/exec/configure-server-sge
___________________________________________________________________
Name: svn:executable
   + *

Modified: SwiftApps/SwiftR/Swift/exec/start-swift
===================================================================
--- SwiftApps/SwiftR/Swift/exec/start-swift	2011-01-14 23:27:10 UTC (rev 3966)
+++ SwiftApps/SwiftR/Swift/exec/start-swift	2011-01-15 03:08:22 UTC (rev 3967)
@@ -60,7 +60,7 @@
 
 make-pbs-submit-file()
 {
-  if [ $queue != default ]; then
+  if [ $queue != NONE ]; then
     queueDirective="#PBS -q $queue"
   else
     queueDirective=""
@@ -98,7 +98,7 @@
     usage
     exit 1
   fi
-  if [ $queue != default ]; then # FIXME: this will interfere if user really wants to use "-q default"
+  if [ $queue != NONE ]; then
     queueDirective="#PBS -q $queue"
   else
     queueDirective=""
@@ -148,17 +148,23 @@
 make-sge-submit-file()
 {
 echo in $0
-  if [ $queue != default ]; then
+  if [ $queue != NONE ]; then
     queueDirective="#$ -q $queue"
   else
     queueDirective=""
   fi
-  if [ $project != default ]; then
+  if [ $project != NONE ]; then
     projectDirective="#$ -A $project"
   else
     projectDirective=""
   fi
-  rcmd="qrsh" # FIXME - need to set on system basis; qrsh works for siraf
+  if [ $perEnv != NONE ]; then
+    parEnvDirective="#$ -pe $parEnv $(($nodes*$cores))
+  else
+    parEnvDirective=""
+  fi
+
+#  rcmd="qrsh" # FIXME - need to set on system basis; qrsh works for siraf
   
 cat >batch.sub <<END
 #!/bin/bash
@@ -170,29 +176,29 @@
 # #$ -v WORKER_LOGGING_LEVEL=NONE
 #$ -V
 
-# Siraf Site-specific:
-#$ -pe openmpi $(($nodes*$cores))
+$parEnvDirective
 $queueDirective
-#  -A ???
-#$projectDirective
+$projectDirective
 
 # Ranger Site-specific:
 # $ -pe 16way 256
 # $ -q development
 # $ -A TG-DBS080004N
 
-  cd / && NODES=\`cat \$PE_HOSTFILE | awk '{ for(i=0;i<\$2;i++){print \$1} }'\`
+  cd /
+  if [ $workmode = slot ]; then
+    NODES=\`cat \$PE_HOSTFILE | awk '{ for(i=0;i<\$2;i++){print \$1} }'\`
+  else 
+    NODES=\`cat $PE_HOSTFILE | awk '{print \$1}'\` # Better for Ranger, Eddie, ...
+  fi
 
-  # -or-  cd / && NODES=`cat $PE_HOSTFILE | awk '{print $1}'` # Better for Ranger
-
-  # WORKER_LOGGING_ENABLED=true # FIXME: parameterize; fix w PBS -v
-  #cd / && /usr/bin/perl $SWIFTBIN/worker.pl $CONTACT SwiftR-workers $HOME/.globus/coasters $IDLETIMEOUT
-  HOST=\$(echo $CONTACT | sed -e 's,^http://,,' -e 's/:.*//')
-  PORT=\$(echo $CONTACT | sed -e 's,^.*:,,')
+  HOST=$(echo $CONTACT | sed -e 's,^http://,,' -e 's/:.*//')
+  PORT=$(echo $CONTACT | sed -e 's,^.*:,,')
   echo '***' PE_HOSTFILE file: \$PE_HOSTFILE CONTACT:$CONTACT
+  cat \$PE_HOSTFILE
 
   for h in \$NODES; do
-    workerCmd="echo Swift R startup running on host; hostname; cd /; WORKER_LOGGING_LEVEL=NONE /usr/bin/perl $SWIFTBIN/worker.pl $CONTACT SwiftR-\$h $HOME/.globus/coasters $IDLETIMEOUT"
+    workerCmd="echo Swift R startup running on host; hostname; cd /; WORKER_LOGGING_LEVEL=$workerLogging /usr/bin/perl $SWIFTBIN/worker.pl $CONTACT SwiftR-\$h $HOME/.globus/coasters $IDLETIMEOUT"
     if [ $rcmd = ssh ]; then
       ssh \$h "\$workerCmd" &
     else
@@ -283,18 +289,25 @@
 defaultCores=4
 throttle=10
 hosts=no-hosts-specified
-queue=default
-project=default
+queue=NONE
+project=NONE
+parEnv=NONE
 
+rcmd=ssh      # rcmd: ssh (typical) or qrsh (eg for siraf with node login restrictions)
+workmode=slot # slot: start one worker on each slot; node: start on worker for all slots on a node
+
 while [ $# -gt 0 ]
 do
   case "$1" in
+    -A) project=$2; verify-not-null project $project; shift ;;
     -c) cores=$2; verify-is-numeric cores $cores; shift ;;
+    -e) parEnv=$2; verify-not-null parEnv $parEnv; shift ;; 
     -h) hosts=$2; verify-not-null hosts $hosts; shift ;; 
+    -m) workmode=$2; verify-is-one-of workmode $workmode slot node; shift ;; 
     -n) nodes=$2; verify-is-numeric nodes $nodes; shift ;;
     -p) throttle=$2; verify-is-numeric throttle $throttle; shift ;;
     -q) queue=$2; verify-not-null queue $queue; shift ;;
-    -A) project=$2; verify-not-null project $project; shift ;;
+    -r) rcmd=$2; verify-is-one-of rcmd $rcmd ssh qrsh; shift ;;
     -s) server=$2; verify-is-one-of server $server local ssh pbs pbsf sge; shift ;;
     -t) time=$2; verify-not-null time $time; shift ;;
     *)  usage; exit 1 ;;
@@ -302,7 +315,7 @@
   shift
 done
 
-echo cores=$cores nodes=$nodes queue=$queue server=$server throttle=$throttle
+echo project=$project cores=$cores nodes=$nodes queue=$queue server=$server throttle=$throttle
 
 SWIFTRBIN=$(cd $(dirname $0); pwd)
 SWIFTBIN=$SWIFTRBIN/../swift/bin  # This depends on ~/SwiftR/Swift/swift being a symlink to swift in RLibrary/Swift




More information about the Swift-commit mailing list