[Swift-commit] r4435 - in SwiftApps/SwiftR/Swift: R exec
tga at ci.uchicago.edu
tga at ci.uchicago.edu
Thu Apr 28 17:38:26 CDT 2011
Author: tga
Date: 2011-04-28 17:38:26 -0500 (Thu, 28 Apr 2011)
New Revision: 4435
Modified:
SwiftApps/SwiftR/Swift/R/Workers.R
SwiftApps/SwiftR/Swift/exec/start-swift
Log:
Started work on allowing users to provide their own sites file etc.
Added in options to allow users to provide custom sites.xml, tc.data, cf.
Needs additional work to determine the best way to handle this.
Modified: SwiftApps/SwiftR/Swift/R/Workers.R
===================================================================
--- SwiftApps/SwiftR/Swift/R/Workers.R 2011-04-28 16:14:21 UTC (rev 4434)
+++ SwiftApps/SwiftR/Swift/R/Workers.R 2011-04-28 22:38:26 UTC (rev 4435)
@@ -43,13 +43,23 @@
}
-swiftInit <- function( cores=NULL, server=NULL,
- hosts=NULL, nodes=NULL, project=NULL,
- parEnv=NULL, kernel=NULL, workmode=NULL,
- throttle=NULL, queue=NULL,
- rcmd=NULL, time=NULL,
- workerLogging=NULL,swiftLogging=NULL,
- keepworkdir=NULL)
+swiftInit <- function( cores=getOption("swift.cores"),
+ server=getOption("swift.server"),
+ hosts=getOption("swift.hosts"),
+ nodes=getOption("swift.nodes"),
+ project=getOption("swift.project"),
+ parEnv=getOption("swift.parenv"),
+ kernel=getOption("swift.kernel"),
+ workmode=getOption("swift.workmode"),
+ throttle=getOption("swift.throttle"),
+ queue=getOption("swift.queue"),
+ rcmd=getOption("swift.rcmd"), time=getOption("swift.time"),
+ workerLogging=getOption("swift.workerLogging"),
+ swiftLogging=getOption("swift.swiftLogging"),
+ keepworkdir=getOption("swift.keepworkdir"),
+ tc.file=getOption("swift.tc.file"),
+ cf.file=getOption("swift.cf.file"),
+ sites.file=getOption("swift.sites.file"))
{
# server: which server backend to use to acquire workers
# for example, local runs tasks on the local machine
@@ -82,34 +92,25 @@
if(is.null(server))
- server <- getOption("swift.server")
- if(is.null(server))
- server <- "local"
- if(! is.null(server) ) {
- cmdString <- paste(cmdString, "-s", shQuote(server))
- }
+ server <- "local"
+ cmdString <- paste(cmdString, "-s", shQuote(server))
+
- if(is.null(cores))
- cores <- getOption("swift.cores")
- if (is.null(cores)) {
- if (server == "local") {
- cores <- getOption("swift.system.cores")
- if (is.null(cores))
- cores <- 2
- }
- else if (server == "ssh") {
- cores <- 4
- }
- else {
- cores <- 8
- }
+ if (is.null(cores)) {
+ if (server == "local") {
+ cores <- getOption("swift.system.cores")
+ if (is.null(cores))
+ cores <- 2
}
+ else if (server == "ssh")
+ cores <- 4
+ else
+ cores <- 8
+ }
if(! is.null(cores) ) {
cmdString <- paste(cmdString, "-c", shQuote(cores))
}
- if(is.null(hosts))
- hosts <- getOption("swift.hosts")
if( is.null(hosts) ) {
if (server == "ssh") {
error(paste("Need to provide hosts list for ssh server."))
@@ -126,21 +127,14 @@
cmdString <- paste(cmdString, "-h", shQuote(hosts) )
}
- if(is.null(parEnv))
- parEnv <- getOption("swift.parenv")
if(! is.null(parEnv) ) {
cmdString <- paste(cmdString, "-e", shQuote(parEnv))
}
-
- if(is.null(kernel))
- kernel <- getOption("swift.kernel")
if(! is.null(kernel) ) {
cmdString <- paste(cmdString, "-kernel", shQuote(kernel))
}
- if(is.null(workmode))
- workmode <- getOption("swift.workmode")
if(! is.null(workmode) ) {
cmdString <- paste(cmdString, "-m", shQuote(workmode))
}
@@ -149,75 +143,64 @@
nodes <- 1
else {
if(is.null(nodes))
- nodes <- getOption("swift.nodes")
- if(is.null(nodes))
if (server == "ssh") {
nodes <- length(strsplit(hosts, " ", fixed=T)[[1]])
# cat("Node count for hosts '", hosts, "' is ", nodes)
}
else
nodes <- 1 # Default value
- if(! is.null(nodes) ) {
+ if(! is.null(nodes) )
cmdString <- paste(cmdString, "-n", shQuote(nodes))
- }
}
- if(is.null(throttle)) {
- throttle <- getOption("swift.throttle")
- if (is.null(throttle)) {
- # number of simultaneous jobs / 10:
- # Round up to nearest integer
- throttle <- as.integer((cores * nodes / 10.0) + 1)
- }
+ if (is.null(throttle)) {
+ # number of simultaneous jobs / 10:
+ # Round up to nearest integer
+ throttle <- as.integer((cores * nodes / 10.0) + 1)
}
- if(! is.null(throttle) ) {
+ if(! is.null(throttle))
cmdString <- paste(cmdString, "-p", shQuote(throttle))
- }
- if(is.null(queue))
- queue <- getOption("swift.queue")
- if(! is.null(queue) ) {
+ if(! is.null(queue) )
cmdString <- paste(cmdString, "-q", shQuote(queue))
- }
- if(is.null(project))
- project <- getOption("swift.project")
- if(! is.null(project) ) {
+ if(! is.null(project) )
cmdString <- paste(cmdString, "-A", shQuote(project))
- }
- if(is.null(rcmd))
- rcmd <- getOption("swift.rcmd")
- if(! is.null(rcmd) ) {
+ if(! is.null(rcmd) )
cmdString <- paste(cmdString, "-r", shQuote(rcmd))
- }
-
- if(is.null(time))
- time <- getOption("swift.time")
- if(! is.null(time) ) {
+ if(! is.null(time) )
cmdString <- paste(cmdString, "-t", shQuote(time))
- }
- if(is.null(workerLogging))
- workerLogging <- getOption("swift.workerLogging")
- if(! is.null(workerLogging) ) {
+ if(! is.null(workerLogging) )
cmdString <- paste(cmdString, "-w", shQuote(workerLogging))
- }
- if(is.null(swiftLogging))
- swiftLogging <- getOption("swift.swiftLogging")
- if( (! is.null(swiftLogging)) && swiftLogging ) {
+ if( (! is.null(swiftLogging)) && swiftLogging )
cmdString <- paste(cmdString, "-L")
- }
- if(is.null(keepworkdir))
- keepworkdir <- getOption("swift.keepworkdir")
if(!is.null(keepworkdir) && keepworkdir)
cmdString <- paste(cmdString, "-k")
if (!is.null(getOption("swift.trace")) && getOption("swift.trace"))
cmdString <- paste(cmdString, "--trace")
+
+ if (server == "custom") {
+ if (is.null(sites.file)) {
+ stop(paste("sites file must be specified by argument or",
+ "option for custom server"))
+ }
+ if (is.null(tc.file)) {
+ stop(paste("tc file must be specified by argument or",
+ "option for custom server"))
+ }
+ if (is.null(cf.file)) {
+ stop(paste("cf file must be specified by argument or",
+ "option for custom server"))
+ }
+ cmdString <- paste(cmdString, "--cf.file", shQuote(cf.file),
+ "--tc.file", shQuote(tc.file), "--sites.file", shQuote(sites.file))
+ }
# launch asynchronously
# for now, we will rely on the shell script's output to inform
Modified: SwiftApps/SwiftR/Swift/exec/start-swift
===================================================================
--- SwiftApps/SwiftR/Swift/exec/start-swift 2011-04-28 16:14:21 UTC (rev 4434)
+++ SwiftApps/SwiftR/Swift/exec/start-swift 2011-04-28 22:38:26 UTC (rev 4435)
@@ -45,7 +45,6 @@
IDLETIMEOUT=$((60*60*240)) # 10 days: FIXME: make this a command line arg
- rm -rf remotepid.* # FIXME: should not be needed if we start in a new dir each time
echo "Starting to launch ssh workers on hosts: $hosts"
for host in $(echo $hosts); do
timestamp=$(date "+%Y.%m%d.%H%M%S")
@@ -78,7 +77,6 @@
}
-# FIXME: does PBS need same workers-per-node logic as SGE?
make-pbs-submit-file()
{
SUBMIT_FILE=$1
@@ -432,15 +430,19 @@
-c cores 2,4,5 >= 1 (default is: local 2; ssh 4; cluster 8)
-e parEnv site specific, SGE only
-h hosts 1 list of hosts, quoted as one argument, space separated
- -m workmode node node: start one worker for all slots on a node; slot (one worker on each slot) (Currently ignored)
+ -m workmode node node: start one worker for all slots on a node;
+ slot: one worker on each slot (Currently ignored)
-n nodes 1
-p throttle 10 >= 1 integer
-q queue site speific (PBS, SGE, Cobalt)
- -r rcmd ssh site specific, SGE only, typically ssh. qrsh for siraf cluster
- -s server local local, pbs, sge, ssh, pbsf,cobalt,crayxt (for firewalled worker nodes)
+ -r rcmd ssh site specific, SGE only, typically ssh.
+ qrsh for siraf cluster
+ -s server local local, pbs, sge, ssh, pbsf (for firewalled workers)
+ ,cobalt,crayxt,custom
-t time 00:30:00 hh:mm:ss, for PBS, Cobalt and SGE only
-w wkloglvl NONE NONE, ERROR, WARN, INFO, DEBUG, TRACE
- -k keepdir No argument, if flag is set, will keep working directory
+ -k keepdir No argument, if flag is set, will keep working
+ directory
--trace If provided, echo bash commands
Examples:
@@ -478,6 +480,9 @@
workerLogging=ERROR
swiftLoggingFlag="-minimal.logging"
keepdir=FALSE
+sites_file=
+tc_file=
+cf_file=
rcmd=ssh # rcmd: ssh (typical) or qrsh (eg for siraf with node login restrictions)
workmode=slot # slot: start one worker on each slot; node: start one worker for all slots on a node
@@ -495,13 +500,16 @@
-p) throttle=$2; verify-is-numeric throttle $throttle; shift ;;
-q) queue=$2; verify-not-null queue $queue; shift ;;
-r) rcmd=$2; verify-is-one-of rcmd $rcmd ssh qrsh; shift ;;
- -s) server=$2; verify-is-one-of server $server local ssh pbs pbsf sge cobalt crayxt; shift ;;
+ -s) server=$2; verify-is-one-of server $server local ssh pbs pbsf sge cobalt crayxt custom; shift ;;
-t) time=$2; verify-not-null time $time; shift ;;
-w) workerLogging=$2; verify-is-one-of workerLoggingLevel $workerLogging NONE ERROR WARN INFO DEBUG TRACE; shift ;;
-L) swiftLoggingFlag="" ;; # swift default is lots of logging
-k) keepdir=TRUE ;;
--trace) set -x ;;
-d) workdir=$2; verify-not-null workdir $workdir; shift ;;
+ --tc.file) tc_file=$2; shift ;;
+ --sites.file) sites_file=$2; shift ;;
+ --cf.file) cf_file=$2; shift ;;
*) usage; exit 1 ;;
esac
shift
@@ -531,9 +539,28 @@
rm -f $rundir
ln -s $trundir $rundir
-cd $trundir
+# Before we cd into dir, copy across
+if [ $server = custom ]; then
+ if [ ! -f "$tc_file" ]; then
+ echo --tc.file not provided or does not exist
+ cd /; rm -rf $trundir
+ exit 1
+ elif [ ! -f "$sites_file" ]; then
+ echo --sites.file not provided or does not exist
+ cd /; rm -rf $trundir
+ exit 1
+ elif [ ! -f "$cf_file" ]; then
+ echo --cf.file not provided or does not exist
+ cd /; rm -rf $trundir
+ exit 1
+ fi
+ cp $tc_file $trundir/tc
+ cp $cf_file $trundir/cf
+ cp $sites_file $trundir/sites.xml
+fi
+cd $trundir
# Standard clenuup actions
function stdcleanup_start {
@@ -576,7 +603,16 @@
# Function to run on termination of swift
exitcmd=""
-if [ $server = local ]; then
+if [ $server = custom ]; then
+ # have already set up tc.data and sites.xml files, just set
+ #onexit
+ function onexit {
+ stdcleanup_start
+ stdcleanup_end
+ }
+ trap onexit $TRAPEVENTS
+ exitcmd=onexit
+elif [ $server = local ]; then
if [ $cores -eq 0 ]; then
cores=$defaultLocalCores
@@ -602,7 +638,6 @@
exit 0
}
- TRAPEVENTS="EXIT 1 2 3 15" # Signals and conditions to trap
trap onexit $TRAPEVENTS
exitcmd=onexit
@@ -617,7 +652,6 @@
sshpidfile=${out/stdouterr/workerpids}
- TRAPEVENTS="EXIT 1 2 3 15" # Signals and conditions to trap
function onexit {
stdcleanup_start
@@ -662,7 +696,6 @@
jobidfile=${out/stdouterr/jobid}
- TRAPEVENTS="EXIT 1 2 3 15" # Signals and conditions to trap
function onexit {
stdcleanup_start
More information about the Swift-commit
mailing list