[Swift-commit] r4158 - in SwiftApps/SwiftR: . Swift/R

noreply at svn.ci.uchicago.edu noreply at svn.ci.uchicago.edu
Wed Mar 2 14:48:44 CST 2011


Author: tga
Date: 2011-03-02 14:48:44 -0600 (Wed, 02 Mar 2011)
New Revision: 4158

Modified:
   SwiftApps/SwiftR/IMMEDIATE-TODO
   SwiftApps/SwiftR/Swift/R/Workers.R
Log:
Added some initial logic to support determining host list from an environment variable.


Modified: SwiftApps/SwiftR/IMMEDIATE-TODO
===================================================================
--- SwiftApps/SwiftR/IMMEDIATE-TODO	2011-03-01 22:17:23 UTC (rev 4157)
+++ SwiftApps/SwiftR/IMMEDIATE-TODO	2011-03-02 20:48:44 UTC (rev 4158)
@@ -40,12 +40,6 @@
 - test on Ranger
 
 HIGH:
-
-Coaster timeout problem:
-    He (Mihael) will also look at a better fix to the coaster timeout problem, but for now, you should integrate the timeout change from my trunk/cog/modules/provider-coaster/src/* into your test trunk/
-    Otherwise, you'll find that your coaster workers quit after a few minutes of inactivity and then start-swift needs to be killed, workers cleanup up, and start-swift restarted.
-
-HIGH:
 See email about pbs breaking on UVa cluster
 
 MID:
@@ -138,3 +132,10 @@
 HIGH:
  Look at all OmxNNN parallel calls - see if any are used that we dont yet handle.
  - It turns out that they are not currently needed
+
+HIGH:
+
+Coaster timeout problem:
+    He (Mihael) will also look at a better fix to the coaster timeout problem, but for now, you should integrate the timeout change from my trunk/cog/modules/provider-coaster/src/* into your test trunk/
+    Otherwise, you'll find that your coaster workers quit after a few minutes of inactivity and then start-swift needs to be killed, workers cleanup up, and start-swift restarted.
+

Modified: SwiftApps/SwiftR/Swift/R/Workers.R
===================================================================
--- SwiftApps/SwiftR/Swift/R/Workers.R	2011-03-01 22:17:23 UTC (rev 4157)
+++ SwiftApps/SwiftR/Swift/R/Workers.R	2011-03-02 20:48:44 UTC (rev 4158)
@@ -1,5 +1,46 @@
 
+getNodeList <- function (server=getOption("swift.server")) {
+    # Run within a job script submitted to a batch scheduler, this
+    # function works out the nodes allocated.
+    # It returns a data frame where the first column is the unique host names
+    # and the second column is the number of processes for that node
+    # if server is not specified and the swift.server option is unset,
+    # this will try the different possibilities in sequence.
+    hostinfo <- NULL
+    if (!is.null(server) && !server %in% c("pbs", "sge", "cobalt", "pbsf")) {
+        stop(paste("Invalid server setting for getNodeList:", server))
+    }
+    if (is.null(server) || server == "pbs" || server == "pbsf") {
+        hostfile <- Sys.getenv("PBS_NODEFILE")
+        if (hostfile[[1]] != "") {
+            hostnames <- read.table(hostfile, stringsAsFactors=FALSE)[[1]]
+            summ <- rle(sort(hostnames))
+            hostinfo <- data.frame(hosts=summ[[2]], count=summ[[1]])
+        }
+    }
 
+    if (is.null(server) || server == "cobalt") {
+        hostfile <- Sys.getenv(c("COBALT_NODEFILE"))
+        if (hostfile[[1]] != "") {
+            hostnames <- read.table(hostfile, stringsAsFactors=FALSE)[[1]]
+            summ <- rle(sort(hostnames))
+            hostinfo <- data.frame(hosts=summ[[2]], count=summ[[1]])
+        }
+    }
+    if (is.null(server) || server == "sge") {
+        hostfile <- Sys.getenv(c("PE_HOSTFILE"))
+        if (hostfile[[1]] != "") {
+            hostinfo <- read.table(hostfile, stringsAsFactors=FALSE,
+                    col.names=c("hosts", "count"))
+        }
+    }
+    if (is.null(hostinfo))
+        stop("Could not find environment variable pointing to node list.")
+    else
+        return (hostinfo)
+}
+
+
 swiftInit <- function( cores=NULL, server=NULL, 
                     hosts=NULL, nodes=NULL, project=NULL, 
                     parEnv=NULL, kernel=NULL, workmode=NULL,




More information about the Swift-commit mailing list