[Swift-commit] r4156 - SwiftApps/SwiftR/Swift/exec

noreply at svn.ci.uchicago.edu noreply at svn.ci.uchicago.edu
Mon Feb 28 17:34:04 CST 2011


Author: tga
Date: 2011-02-28 17:34:04 -0600 (Mon, 28 Feb 2011)
New Revision: 4156

Added:
   SwiftApps/SwiftR/Swift/exec/configure-server-crayxt
Modified:
   SwiftApps/SwiftR/Swift/exec/start-swift
Log:
Got SwiftR to point where it can launch coaster workers on beagle compute nodes, but haven't successfully got those to register with the coaster server.


Copied: SwiftApps/SwiftR/Swift/exec/configure-server-crayxt (from rev 4154, SwiftApps/SwiftR/Swift/exec/configure-server-pbs)
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-crayxt	                        (rev 0)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-crayxt	2011-02-28 23:34:04 UTC (rev 4156)
@@ -0,0 +1,52 @@
+#! /bin/bash
+
+# configuration for PBS with manually-started Swift workers (passive coasters)
+
+cores=$1
+
+throttlePBS=$2
+
+cat >tc <<END
+fork      bashlocal /bin/bash null null null
+pbs       bash      /bin/bash null null ENV::PATH="$PATH";GLOBUS::maxwalltime="00:01:00"
+END
+
+# FIXME: examine effect of 1-min default maxwalltime above
+# FIXME: determine best value for throttle below
+
+cat >sites.xml <<END
+<config>
+
+  <pool handle="fork">
+    <execution provider="local" url="none" />
+    <profile key="jobThrottle" namespace="karajan">0.15</profile>
+    <profile namespace="karajan" key="initialScore">10000</profile>
+    <filesystem provider="local" url="none" />
+    <workdirectory>$(pwd)/swiftwork</workdirectory>
+  </pool>
+
+  <pool handle="pbs">
+    <execution provider="coaster" url="none" jobmanager="local:NA"/>
+    <profile namespace="globus" key="workerManager">passive</profile>
+    <profile namespace="globus" key="workersPerNode">$cores</profile>
+    <profile namespace="karajan" key="jobThrottle">$throttlePBS</profile>
+    <profile namespace="karajan" key="initialScore">10000</profile>
+    <filesystem provider="local" url="none"/>
+    <workdirectory>$(pwd)/swiftwork</workdirectory>
+    <workdirectory>$(pwd)/swiftscratch</workdirectory>
+  </pool>
+
+</config>
+END
+
+cat >cf <<END
+wrapperlog.always.transfer=false
+sitedir.keep=false
+execution.retries=0
+lazy.errors=false
+status.mode=provider
+use.provider.staging=true
+provider.staging.pin.swiftfiles=true
+#throttle.host.submit=1
+END
+

Modified: SwiftApps/SwiftR/Swift/exec/start-swift
===================================================================
--- SwiftApps/SwiftR/Swift/exec/start-swift	2011-02-28 22:35:41 UTC (rev 4155)
+++ SwiftApps/SwiftR/Swift/exec/start-swift	2011-02-28 23:34:04 UTC (rev 4156)
@@ -189,6 +189,37 @@
 chmod +x $SUBMIT_FILE 
 }
 
+make-crayxt-submit-file()
+{
+  SUBMIT_FILE=$1
+  if [ $queue != NONE ]; then
+    queueDirective="#PBS -q $queue"
+  else
+    queueDirective=""
+  fi
+cat > $SUBMIT_FILE <<END
+#PBS -S /bin/sh
+#PBS -N SwiftR-workers
+#PBS -l mppwidth=$(($nodes*$cores))
+#PBS -l mppnppn=1
+#PBS -l mppdepth=$cores
+#PBS -l walltime=$time
+#PBS -m n
+#PBS -o $HOME
+#PBS -e $HOME
+$queueDirective
+#export WORKER_LOGGING_LEVEL=$workerLogging # FIXME: parameterize; fix w PBS -v
+#cd / && /usr/bin/perl $SWIFTBIN/worker.pl $CONTACT SwiftR-workers $HOME/.globus/coasters $IDLETIMEOUT
+
+#TODO: lustre working directory?
+
+aprun -N 1 -d $cores -n $(($cores*$nodes)) \
+    /usr/bin/perl $SWIFTBIN/worker.pl $CONTACT SwiftR-\$h $HOME/.globus/coasters $IDLETIMEOUT
+
+END
+}
+
+
 # FIXME: for big systems like Ranger, need to use ssh_tree to avoid socket FD exhastion?
 
 make-sge-submit-file()
@@ -406,7 +437,7 @@
    -p throttle 10          >= 1 integer
    -q queue                site speific (PBS, SGE, Cobalt)
    -r rcmd     ssh         site specific, SGE only, typically ssh. qrsh for siraf cluster
-   -s server   local       local, pbs, sge, ssh, pbsf,cobalt (for firewalled worker nodes)
+   -s server   local       local, pbs, sge, ssh, pbsf,cobalt,crayxt (for firewalled worker nodes)
    -t time     00:30:00    hh:mm:ss, for PBS, Cobalt and SGE only
    -w wkloglvl NONE        NONE, ERROR, WARN, INFO, DEBUG, TRACE
    -k keepdir              No argument, if flag is set, will keep working directory
@@ -464,7 +495,7 @@
     -p) throttle=$2; verify-is-numeric throttle $throttle; shift ;;
     -q) queue=$2; verify-not-null queue $queue; shift ;;
     -r) rcmd=$2; verify-is-one-of rcmd $rcmd ssh qrsh; shift ;;
-    -s) server=$2; verify-is-one-of server $server local ssh pbs pbsf sge cobalt; shift ;;
+    -s) server=$2; verify-is-one-of server $server local ssh pbs pbsf sge cobalt crayxt; shift ;;
     -t) time=$2; verify-not-null time $time; shift ;;
     -w) workerLogging=$2; verify-is-one-of workerLoggingLevel $workerLogging NONE ERROR WARN INFO DEBUG TRACE; shift ;;
     -L) swiftLoggingFlag="" ;; # swift default is lots of logging
@@ -616,7 +647,7 @@
   starterpid=$!
 
 elif [ \( $server = pbs \) -o \( $server = pbsf \) -o \( $server = sge \) \
-        -o \( $server = cobalt \) ]; then
+        -o \( $server = cobalt \) -o \( $server = crayxt \) ]; then
 
   if [ $cores -eq 0 ]; then
     cores=$defaultClusterCores




More information about the Swift-commit mailing list