[Swift-commit] r4435 - in SwiftApps/SwiftR/Swift: R exec

tga at ci.uchicago.edu tga at ci.uchicago.edu
Thu Apr 28 17:38:26 CDT 2011


Author: tga
Date: 2011-04-28 17:38:26 -0500 (Thu, 28 Apr 2011)
New Revision: 4435

Modified:
   SwiftApps/SwiftR/Swift/R/Workers.R
   SwiftApps/SwiftR/Swift/exec/start-swift
Log:
Started work on allowing users to provide their own sites file etc.  
Added in options to allow users to provide custom sites.xml, tc.data, cf.
Needs additional work to determine the best way to handle this.


Modified: SwiftApps/SwiftR/Swift/R/Workers.R
===================================================================
--- SwiftApps/SwiftR/Swift/R/Workers.R	2011-04-28 16:14:21 UTC (rev 4434)
+++ SwiftApps/SwiftR/Swift/R/Workers.R	2011-04-28 22:38:26 UTC (rev 4435)
@@ -43,13 +43,23 @@
 }
 
 
-swiftInit <- function( cores=NULL, server=NULL, 
-                    hosts=NULL, nodes=NULL, project=NULL, 
-                    parEnv=NULL, kernel=NULL, workmode=NULL,
-                    throttle=NULL, queue=NULL,
-                    rcmd=NULL, time=NULL,
-                    workerLogging=NULL,swiftLogging=NULL, 
-                    keepworkdir=NULL)
+swiftInit <- function( cores=getOption("swift.cores"), 
+                    server=getOption("swift.server"), 
+                    hosts=getOption("swift.hosts"), 
+                    nodes=getOption("swift.nodes"), 
+                    project=getOption("swift.project"), 
+                    parEnv=getOption("swift.parenv"), 
+                    kernel=getOption("swift.kernel"), 
+                    workmode=getOption("swift.workmode"),
+                    throttle=getOption("swift.throttle"), 
+                    queue=getOption("swift.queue"),
+                    rcmd=getOption("swift.rcmd"), time=getOption("swift.time"),
+                    workerLogging=getOption("swift.workerLogging"),
+                    swiftLogging=getOption("swift.swiftLogging"), 
+                    keepworkdir=getOption("swift.keepworkdir"), 
+                    tc.file=getOption("swift.tc.file"), 
+                    cf.file=getOption("swift.cf.file"), 
+                    sites.file=getOption("swift.sites.file"))
 {
     # server: which server backend to use to acquire workers
     #           for example, local runs tasks on the local machine
@@ -82,34 +92,25 @@
 
     
     if(is.null(server))
-        server <- getOption("swift.server")
-        if(is.null(server))
-            server <- "local"
-    if(! is.null(server) )  {
-        cmdString <- paste(cmdString, "-s", shQuote(server)) 
-    }
+        server <- "local"
+    cmdString <- paste(cmdString, "-s", shQuote(server)) 
+
     
-    if(is.null(cores))
-        cores <- getOption("swift.cores")
-        if (is.null(cores)) {
-            if (server == "local") {
-                cores <- getOption("swift.system.cores")
-                if (is.null(cores)) 
-                    cores <- 2
-            }
-            else if (server == "ssh") {
-                cores <- 4
-            }
-            else {
-                cores <- 8
-            }
+    if (is.null(cores)) {
+        if (server == "local") {
+            cores <- getOption("swift.system.cores")
+            if (is.null(cores)) 
+                cores <- 2
         }
+        else if (server == "ssh") 
+            cores <- 4
+        else 
+            cores <- 8
+    }
     if(! is.null(cores) )  {
         cmdString <- paste(cmdString, "-c", shQuote(cores))
     }
 
-    if(is.null(hosts))
-        hosts <- getOption("swift.hosts")
     if( is.null(hosts) )  {
         if (server == "ssh") {
             error(paste("Need to provide hosts list for ssh server."))
@@ -126,21 +127,14 @@
         cmdString <- paste(cmdString, "-h", shQuote(hosts) )
     }
     
-    if(is.null(parEnv))
-        parEnv <- getOption("swift.parenv")
     if(! is.null(parEnv) )  {
         cmdString <- paste(cmdString, "-e", shQuote(parEnv)) 
     }
-    
-    if(is.null(kernel))
-        kernel <- getOption("swift.kernel")
 
     if(! is.null(kernel) )  {
         cmdString <- paste(cmdString, "-kernel", shQuote(kernel)) 
     }
    
-    if(is.null(workmode))
-        workmode <- getOption("swift.workmode")
     if(! is.null(workmode) )  {
         cmdString <- paste(cmdString, "-m", shQuote(workmode)) 
     }
@@ -149,75 +143,64 @@
         nodes <- 1
     else {
         if(is.null(nodes))
-            nodes <- getOption("swift.nodes")
-        if(is.null(nodes))
             if (server == "ssh") {
                 nodes <- length(strsplit(hosts, " ", fixed=T)[[1]])
 #                cat("Node count for hosts '", hosts, "' is ", nodes)
             }
             else 
                 nodes <- 1 # Default value
-        if(! is.null(nodes) )  {
+        if(! is.null(nodes) )  
             cmdString <- paste(cmdString, "-n", shQuote(nodes)) 
-        }
     }
     
-    if(is.null(throttle)) {
-        throttle <- getOption("swift.throttle")
-        if (is.null(throttle)) {
-            # number of simultaneous jobs / 10:
-            #   Round up to nearest integer
-            throttle <- as.integer((cores * nodes / 10.0) + 1)
-        }
+    if (is.null(throttle)) {
+        # number of simultaneous jobs / 10:
+        #   Round up to nearest integer
+        throttle <- as.integer((cores * nodes / 10.0) + 1)
     }
-    if(! is.null(throttle) )  {
+    if(! is.null(throttle)) 
         cmdString <- paste(cmdString, "-p", shQuote(throttle)) 
-    }
 
-    if(is.null(queue))
-        queue <- getOption("swift.queue")
-    if(! is.null(queue) )  {
+    if(! is.null(queue) )  
         cmdString <- paste(cmdString, "-q", shQuote(queue)) 
-    }
     
-    if(is.null(project))
-        project <- getOption("swift.project")
-    if(! is.null(project) )  {
+    if(! is.null(project) )  
         cmdString <- paste(cmdString, "-A", shQuote(project)) 
-    }
     
-    if(is.null(rcmd))
-        rcmd <- getOption("swift.rcmd")
-    if(! is.null(rcmd) )  {
+    if(! is.null(rcmd) )  
         cmdString <- paste(cmdString, "-r", shQuote(rcmd)) 
-    }
 
-
-    if(is.null(time))
-        time <- getOption("swift.time")
-    if(! is.null(time) )  {
+    if(! is.null(time) )  
         cmdString <- paste(cmdString, "-t", shQuote(time)) 
-    }
     
-    if(is.null(workerLogging))
-        workerLogging <- getOption("swift.workerLogging")
-    if(! is.null(workerLogging) )  {
+    if(! is.null(workerLogging) )  
         cmdString <- paste(cmdString, "-w", shQuote(workerLogging)) 
-    }
     
-    if(is.null(swiftLogging))
-        swiftLogging <- getOption("swift.swiftLogging")
-    if( (! is.null(swiftLogging)) && swiftLogging )  {
+    if( (! is.null(swiftLogging)) && swiftLogging )  
         cmdString <- paste(cmdString, "-L")
-    }
     
-    if(is.null(keepworkdir))
-        keepworkdir <- getOption("swift.keepworkdir")
     if(!is.null(keepworkdir) && keepworkdir)
         cmdString <- paste(cmdString, "-k")
 
     if (!is.null(getOption("swift.trace")) && getOption("swift.trace"))
         cmdString <- paste(cmdString, "--trace")
+    
+    if (server == "custom") {
+        if (is.null(sites.file)) {
+            stop(paste("sites file must be specified by argument or",
+                    "option for custom server"))
+        }
+        if (is.null(tc.file)) {
+            stop(paste("tc file must be specified by argument or",
+                    "option for custom server"))
+        }
+        if (is.null(cf.file)) {
+            stop(paste("cf file must be specified by argument or",
+                    "option for custom server"))
+        }
+        cmdString <- paste(cmdString, "--cf.file", shQuote(cf.file), 
+                "--tc.file", shQuote(tc.file), "--sites.file", shQuote(sites.file))
+    }
 
     # launch asynchronously
     # for now, we will rely on the shell script's output to inform

Modified: SwiftApps/SwiftR/Swift/exec/start-swift
===================================================================
--- SwiftApps/SwiftR/Swift/exec/start-swift	2011-04-28 16:14:21 UTC (rev 4434)
+++ SwiftApps/SwiftR/Swift/exec/start-swift	2011-04-28 22:38:26 UTC (rev 4435)
@@ -45,7 +45,6 @@
 
   IDLETIMEOUT=$((60*60*240)) # 10 days: FIXME: make this a command line arg
 
-  rm -rf remotepid.* # FIXME: should not be needed if we start in a new dir each time
   echo "Starting to launch ssh workers on hosts: $hosts"
   for host in $(echo $hosts); do
     timestamp=$(date "+%Y.%m%d.%H%M%S")
@@ -78,7 +77,6 @@
 }
 
 
-# FIXME: does PBS need same workers-per-node logic as SGE?
 make-pbs-submit-file()
 {
   SUBMIT_FILE=$1
@@ -432,15 +430,19 @@
    -c cores    2,4,5       >= 1 (default is: local 2; ssh 4; cluster 8)
    -e parEnv               site specific, SGE only
    -h hosts    1           list of hosts, quoted as one argument, space separated
-   -m workmode node        node: start one worker for all slots on a node; slot (one worker on each slot) (Currently ignored)
+   -m workmode node        node: start one worker for all slots on a node; 
+                            slot: one worker on each slot (Currently ignored)
    -n nodes    1
    -p throttle 10          >= 1 integer
    -q queue                site speific (PBS, SGE, Cobalt)
-   -r rcmd     ssh         site specific, SGE only, typically ssh. qrsh for siraf cluster
-   -s server   local       local, pbs, sge, ssh, pbsf,cobalt,crayxt (for firewalled worker nodes)
+   -r rcmd     ssh         site specific, SGE only, typically ssh. 
+                                    qrsh for siraf cluster
+   -s server   local       local, pbs, sge, ssh, pbsf (for firewalled workers)
+                            ,cobalt,crayxt,custom
    -t time     00:30:00    hh:mm:ss, for PBS, Cobalt and SGE only
    -w wkloglvl NONE        NONE, ERROR, WARN, INFO, DEBUG, TRACE
-   -k keepdir              No argument, if flag is set, will keep working directory
+   -k keepdir              No argument, if flag is set, will keep working 
+                                                                directory
    --trace                 If provided, echo bash commands
 
     Examples:
@@ -478,6 +480,9 @@
 workerLogging=ERROR
 swiftLoggingFlag="-minimal.logging"
 keepdir=FALSE
+sites_file=
+tc_file=
+cf_file=
 
 rcmd=ssh      # rcmd: ssh (typical) or qrsh (eg for siraf with node login restrictions)
 workmode=slot # slot: start one worker on each slot; node: start one worker for all slots on a node
@@ -495,13 +500,16 @@
     -p) throttle=$2; verify-is-numeric throttle $throttle; shift ;;
     -q) queue=$2; verify-not-null queue $queue; shift ;;
     -r) rcmd=$2; verify-is-one-of rcmd $rcmd ssh qrsh; shift ;;
-    -s) server=$2; verify-is-one-of server $server local ssh pbs pbsf sge cobalt crayxt; shift ;;
+    -s) server=$2; verify-is-one-of server $server local ssh pbs pbsf sge cobalt crayxt custom; shift ;;
     -t) time=$2; verify-not-null time $time; shift ;;
     -w) workerLogging=$2; verify-is-one-of workerLoggingLevel $workerLogging NONE ERROR WARN INFO DEBUG TRACE; shift ;;
     -L) swiftLoggingFlag="" ;; # swift default is lots of logging
     -k) keepdir=TRUE ;;
     --trace) set -x ;;
     -d) workdir=$2; verify-not-null workdir $workdir; shift ;;
+    --tc.file) tc_file=$2; shift ;;
+    --sites.file) sites_file=$2; shift ;;
+    --cf.file) cf_file=$2; shift ;;
     *)  usage; exit 1 ;;
   esac
   shift
@@ -531,9 +539,28 @@
 
 rm -f $rundir
 ln -s $trundir $rundir
-cd $trundir
 
+# Before we cd into dir, copy across
+if [ $server = custom ]; then
+    if [ ! -f "$tc_file" ]; then
+        echo --tc.file not provided or does not exist
+        cd /; rm -rf $trundir
+        exit 1
+    elif [ ! -f "$sites_file" ]; then
+        echo --sites.file not provided or does not exist
+        cd /; rm -rf $trundir
+        exit 1
+    elif [ ! -f "$cf_file" ]; then
+        echo --cf.file not provided or does not exist
+        cd /; rm -rf $trundir
+        exit 1
+    fi
+    cp $tc_file $trundir/tc
+    cp $cf_file $trundir/cf
+    cp $sites_file $trundir/sites.xml
+fi
 
+cd $trundir
 
 # Standard clenuup actions
 function stdcleanup_start {
@@ -576,7 +603,16 @@
 # Function to run on termination of swift
 exitcmd=""
 
-if [ $server = local ]; then
+if [ $server = custom ]; then
+    # have already set up tc.data and sites.xml files, just set
+    #onexit
+    function onexit {
+        stdcleanup_start
+        stdcleanup_end
+    }
+    trap onexit $TRAPEVENTS
+    exitcmd=onexit
+elif [ $server = local ]; then
 
   if [ $cores -eq 0 ]; then
     cores=$defaultLocalCores
@@ -602,7 +638,6 @@
     exit 0
   }
 
-  TRAPEVENTS="EXIT 1 2 3 15"  # Signals and conditions to trap
   trap onexit $TRAPEVENTS
   exitcmd=onexit
 
@@ -617,7 +652,6 @@
 
   sshpidfile=${out/stdouterr/workerpids}
 
-  TRAPEVENTS="EXIT 1 2 3 15"  # Signals and conditions to trap
 
   function onexit {
     stdcleanup_start
@@ -662,7 +696,6 @@
 
   jobidfile=${out/stdouterr/jobid}
 
-  TRAPEVENTS="EXIT 1 2 3 15"  # Signals and conditions to trap
 
   function onexit {
     stdcleanup_start




More information about the Swift-commit mailing list