[Swift-commit] r4158 - in SwiftApps/SwiftR: . Swift/R
noreply at svn.ci.uchicago.edu
noreply at svn.ci.uchicago.edu
Wed Mar 2 14:48:44 CST 2011
Author: tga
Date: 2011-03-02 14:48:44 -0600 (Wed, 02 Mar 2011)
New Revision: 4158
Modified:
SwiftApps/SwiftR/IMMEDIATE-TODO
SwiftApps/SwiftR/Swift/R/Workers.R
Log:
Added some initial logic to support determining host list from an environment variable.
Modified: SwiftApps/SwiftR/IMMEDIATE-TODO
===================================================================
--- SwiftApps/SwiftR/IMMEDIATE-TODO 2011-03-01 22:17:23 UTC (rev 4157)
+++ SwiftApps/SwiftR/IMMEDIATE-TODO 2011-03-02 20:48:44 UTC (rev 4158)
@@ -40,12 +40,6 @@
- test on Ranger
HIGH:
-
-Coaster timeout problem:
- He (Mihael) will also look at a better fix to the coaster timeout problem, but for now, you should integrate the timeout change from my trunk/cog/modules/provider-coaster/src/* into your test trunk/
- Otherwise, you'll find that your coaster workers quit after a few minutes of inactivity and then start-swift needs to be killed, workers cleanup up, and start-swift restarted.
-
-HIGH:
See email about pbs breaking on UVa cluster
MID:
@@ -138,3 +132,10 @@
HIGH:
Look at all OmxNNN parallel calls - see if any are used that we dont yet handle.
- It turns out that they are not currently needed
+
+HIGH:
+
+Coaster timeout problem:
+ He (Mihael) will also look at a better fix to the coaster timeout problem, but for now, you should integrate the timeout change from my trunk/cog/modules/provider-coaster/src/* into your test trunk/
+ Otherwise, you'll find that your coaster workers quit after a few minutes of inactivity and then start-swift needs to be killed, workers cleanup up, and start-swift restarted.
+
Modified: SwiftApps/SwiftR/Swift/R/Workers.R
===================================================================
--- SwiftApps/SwiftR/Swift/R/Workers.R 2011-03-01 22:17:23 UTC (rev 4157)
+++ SwiftApps/SwiftR/Swift/R/Workers.R 2011-03-02 20:48:44 UTC (rev 4158)
@@ -1,5 +1,46 @@
+getNodeList <- function (server=getOption("swift.server")) {
+ # Run within a job script submitted to a batch scheduler, this
+ # function works out the nodes allocated.
+ # It returns a data frame where the first column is the unique host names
+ # and the second column is the number of processes for that node
+ # if server is not specified and the swift.server option is unset,
+ # this will try the different possibilities in sequence.
+ hostinfo <- NULL
+ if (!is.null(server) && !server %in% c("pbs", "sge", "cobalt", "pbsf")) {
+ stop(paste("Invalid server setting for getNodeList:", server))
+ }
+ if (is.null(server) || server == "pbs" || server == "pbsf") {
+ hostfile <- Sys.getenv("PBS_NODEFILE")
+ if (hostfile[[1]] != "") {
+ hostnames <- read.table(hostfile, stringsAsFactors=FALSE)[[1]]
+ summ <- rle(sort(hostnames))
+ hostinfo <- data.frame(hosts=summ[[2]], count=summ[[1]])
+ }
+ }
+ if (is.null(server) || server == "cobalt") {
+ hostfile <- Sys.getenv(c("COBALT_NODEFILE"))
+ if (hostfile[[1]] != "") {
+ hostnames <- read.table(hostfile, stringsAsFactors=FALSE)[[1]]
+ summ <- rle(sort(hostnames))
+ hostinfo <- data.frame(hosts=summ[[2]], count=summ[[1]])
+ }
+ }
+ if (is.null(server) || server == "sge") {
+ hostfile <- Sys.getenv(c("PE_HOSTFILE"))
+ if (hostfile[[1]] != "") {
+ hostinfo <- read.table(hostfile, stringsAsFactors=FALSE,
+ col.names=c("hosts", "count"))
+ }
+ }
+ if (is.null(hostinfo))
+ stop("Could not find environment variable pointing to node list.")
+ else
+ return (hostinfo)
+}
+
+
swiftInit <- function( cores=NULL, server=NULL,
hosts=NULL, nodes=NULL, project=NULL,
parEnv=NULL, kernel=NULL, workmode=NULL,
More information about the Swift-commit
mailing list