[Swift-commit] r3248 - SwiftApps

Mon Feb 22 20:26:49 CST 2010

Author: wilde
Date: 2010-02-22 20:26:49 -0600 (Mon, 22 Feb 2010)
New Revision: 3248

Added:
   SwiftApps/RunR.sh
   SwiftApps/RunSwiftScript.sh
   SwiftApps/Swift.R
   SwiftApps/TestSwift.R
   SwiftApps/bootstrapdemo.R
   SwiftApps/pboot.R
   SwiftApps/swiftapply.swift
Log:
Initial revision of Swift R interface.


Added: SwiftApps/RunR.sh
===================================================================

--- SwiftApps/RunR.sh	                        (rev 0)
+++ SwiftApps/RunR.sh	2010-02-23 02:26:49 UTC (rev 3248)
@@ -0,0 +1,13 @@
+#! /usr/bin/env Rscript
+
+argv = commandArgs(TRUE)
+
+load(argv[1]);
+
+result=list()
+for(c in 1:length(rcall$arglistbatch)) {
+    # FIXME: run this under try/catch and save error status in results object (need to make it a list: rval + error status)
+    result[[c]] = do.call( rcall$func, rcall$arglistbatch[[c]] )
+}
+
+save(result,file=argv[2])


Property changes on: SwiftApps/RunR.sh
___________________________________________________________________
Name: svn:executable
   + 

Added: SwiftApps/RunSwiftScript.sh
===================================================================
--- SwiftApps/RunSwiftScript.sh	                        (rev 0)
+++ SwiftApps/RunSwiftScript.sh	2010-02-23 02:26:49 UTC (rev 3248)
@@ -0,0 +1,32 @@
+rundir=$1
+site=$2
+
+cd $rundir
+
+cat >tc <<EOF
+$site	RunR	/home/wilde/SwiftR/RunR.sh	null	null	null
+EOF
+
+cat >sites.xml <<EOF
+<config>
+  <pool handle="local">
+    <execution provider="local" url="none" />
+    <profile namespace="karajan" key="initialScore">10000</profile>
+    <profile namespace="karajan" key="jobThrottle">.03</profile>
+    <filesystem provider="local"/>
+    <workdirectory>$(pwd)</workdirectory>
+  </pool>
+  <pool handle="pbs">
+    <profile namespace="globus" key="maxwalltime">00:00:10</profile>
+    <profile namespace="globus" key="maxtime">1800</profile>
+    <execution provider="coaster" url="none" jobManager="local:pbs"/>
+    <profile namespace="globus" key="workersPerNode">8</profile>
+    <profile namespace="karajan" key="initialScore">10000</profile>
+    <profile namespace="karajan" key="jobThrottle">.00</profile>
+    <filesystem provider="local"/>
+    <workdirectory>$(pwd)</workdirectory>
+  </pool>
+</config>
+EOF
+
+swift -tc.file tc -sites.file sites.xml ../swiftapply.swift


Property changes on: SwiftApps/RunSwiftScript.sh
___________________________________________________________________
Name: svn:executable
   + 

Added: SwiftApps/Swift.R
===================================================================
--- SwiftApps/Swift.R	                        (rev 0)
+++ SwiftApps/Swift.R	2010-02-23 02:26:49 UTC (rev 3248)
@@ -0,0 +1,76 @@
+swiftapply <- function( func, arglists, site="local", callsperbatch=1 )
+{
+  rundir = system("mktemp -d SwiftR.run.XXX",intern=TRUE)
+  cat("Running in ",rundir,"\n")
+  narglists = length(arglists) # number of arglists to process
+  batch=1   # Next arglist batch number to fill
+  arglist=1 # Next arglist number to insert
+  while(arglist <= narglists) {
+    arglistsleft = narglists - arglist + 1
+    if(arglistsleft >= callsperbatch) {
+      batchsize = callsperbatch
+    }
+    else {
+      batchsize = arglistsleft
+    }
+    arglistbatch = list()
+    for(i in 1 : batchsize) {
+      arglistbatch[[i]] = arglists[[arglist]]
+      arglist = arglist +1 
+    }
+    rcall = list(func=func,arglistbatch=arglistbatch)
+    save(rcall,file=paste(rundir,"/cbatch.",as.character(batch),".Rdata",sep=""))
+    batch = batch + 1;
+  }
+  nbatches = batch - 1
+  system(paste("./RunSwiftScript.sh",rundir,"local",sep=" "))
+
+  rno = 1
+  rlist = list()
+  for(batch in 1:nbatches) {
+    load(paste(rundir,"/rbatch.",as.character(batch),".Rdata",sep=""))
+    nresults = length(result)
+    for(r in 1:nresults) {
+      rlist[[rno]] = result[[r]]
+      rno = rno + 1
+    }
+  }
+  return(rlist)
+}
+
+TODO="
+
+x n args
+x batch
+  into svn
+  unique dirs
+  select sites and swift args (throttles etc)
+  R docs
+  R package (SwiftR)
+  Swift docs
+  async exec
+  clean up boot: fix all calls to statistics
+  error handling and null and missing values: ensure res#s correspond to arg#s
+  status
+  specify swift scripts
+  run async and grab status (track 'runs' in R)
+  increm result collect
+  pass the func as val
+  pass extra funcs and packages required
+  pass extra vals
+  pass extra files
+  specifiy unique swift scritps ala Dirk's tools
+  setup the R envs
+  coasters for persistent R Servers
+  test suites
+  use littleR
+  args as alists vs args as list
+  runids, output logging
+  select exec sites and swift  params etc
+  make polymorphic to *apply and snow
+  stream results back to R (so use can inspect as they arrive)
+  (pull them in with a Swift.poll() func)
+  handle discontiguous results
+  return good error messages including messages from R eval and from Swift
+
+END"

Added: SwiftApps/TestSwift.R
===================================================================
--- SwiftApps/TestSwift.R	                        (rev 0)
+++ SwiftApps/TestSwift.R	2010-02-23 02:26:49 UTC (rev 3248)
@@ -0,0 +1,31 @@
+require(boot)
+source("Swift.R")
+
+args=list(ducks,dogs)
+sumcrits <- function(duckdata,dogdata) { sum( duckdata$plumage, dogdata$mvo ) }
+res = do.call(sumcrits,args)
+cat("Test of do.call(sumcrits)\n")
+print(res)
+
+arglist = rep(list(args),9)
+
+cat("\nTest of swiftapply(sumcrits,arglist)\n")
+res = swiftapply(sumcrits,arglist)
+print(res)
+
+cat("\nTest of swiftapply(sumcrits,arglist,callsperbatch=10)\n")
+res = swiftapply(sumcrits,arglist,callsperbatch=10)
+print(res)
+
+cat("\nTest of swiftapply(sumcrits,arglist,callsperbatch=2)\n")
+res = swiftapply(sumcrits,arglist,callsperbatch=2)
+print(res)
+
+cat("\nTest of swiftapply(sumcrits,arglist,callsperbatch=3)\n")
+res = swiftapply(sumcrits,arglist,callsperbatch=3)
+print(res)
+
+cat("\nTest of swiftapply(sumcrits,arglist,callsperbatch=20)\n")
+res = swiftapply(sumcrits,arglist,callsperbatch=20)
+print(res)
+

Added: SwiftApps/bootstrapdemo.R
===================================================================
--- SwiftApps/bootstrapdemo.R	                        (rev 0)
+++ SwiftApps/bootstrapdemo.R	2010-02-23 02:26:49 UTC (rev 3248)
@@ -0,0 +1,71 @@
+#
+# OpenMx Script to demonstrate use of R's boot package for bootstrapping
+#
+# Author: M.C. Neale 1 September 2009
+#
+
+# Load required libraries
+require(OpenMx)
+require(boot)
+
+# Define a function called mles which will return maximum likelihood estimates
+# It uses the demoOneFactor dataset and one factor model on the OpenMx homepage
+# http://openmx.psyc.virginia.edu
+
+
+mles<-function(dataset,wt){
+cat("in mles=");
+require(OpenMx)
+        manifests <- names(dataset)
+        latents <- c("G")
+        covwt <- cov.wt(dataset,wt)
+        mlevals <- mxRun(mxModel("One Factor", type="RAM",
+            manifestVars = manifests,
+            latentVars = latents,
+            mxPath(from=latents, to=manifests),
+            mxPath(from=manifests, arrows=2),
+            mxPath(from=latents, arrows=2,
+            free=F, values=1.0),
+            mxData(covwt$cov, type="cov",
+            numObs=500)))
+        return(as.vector(mlevals at output$estimate))}
+    
+# Run 100 bootstraps (a smallish number)
+
+boot.out=list()
+
+boot.out[[1]] = pboot(demoOneFactor,mles,R=100)
+#boot.out[[2]] = boot(demoOneFactor,mles,R=8)
+#boot.out[[3]] = boot(demoOneFactor,mles,R=9)
+
+print("done booting - boot.out is:")
+print(boot.out)
+print("end of boot.out")
+
+# For comparison, take a look at the SE output from running the homepage job once
+data(demoOneFactor)
+manifests <- names(demoOneFactor)
+latents <- c("G")
+factorModel <- mxModel("One Factor", type="RAM",
+      manifestVars = manifests,
+      latentVars = latents,
+      mxPath(from=latents, to=manifests),
+      mxPath(from=manifests, arrows=2),
+      mxPath(from=latents, arrows=2,
+            free=F, values=1.0),
+      mxData(cov(demoOneFactor), type="cov",
+            numObs=500))
+facrun<-mxRun(factorModel)
+summary(facrun)
+
+# the estimates and standard errors should match up pretty well, though the number of replicates R above might be increased
+# therefore, only the factorModel estimates are compared:
+
+loadings<-facrun at matrices$A at values[1:5,6]
+errors<-diag(facrun at matrices$S at values[1:5,1:5])
+estimates<-as.vector(c(loadings,errors))
+omxCheckCloseEnough(as.vector(c(0.3971525,0.5036615,0.5772418,0.7027743,0.7962506,0.04081422,0.03802001,0.04082720,0.03938708,0.03628711)),estimates,.001)
+
+# The above should indicate that the results are close enough.
+
+

Added: SwiftApps/pboot.R
===================================================================
--- SwiftApps/pboot.R	                        (rev 0)
+++ SwiftApps/pboot.R	2010-02-23 02:26:49 UTC (rev 3248)
@@ -0,0 +1,118 @@
+pboot =
+function (data, statistic, R, sim = "ordinary", stype = "i", 
+          strata = rep(1, n), L = NULL, m = 0, weights = NULL,
+          ran.gen = function(d, p) d, mle = NULL, simple = FALSE, ...) 
+{
+    call <- match.call()
+    if (simple && (sim != "ordinary" || stype != "i" || sum(m))) {
+        warning("'simple=TRUE' is only valid for 'sim=\"ordinary\", stype=\"i\", n=0, so ignored")
+        simple <- FALSE
+    }
+    if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) 
+        runif(1)
+    seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE)
+    if (isMatrix(data)) 
+        n <- nrow(data)
+    else n <- length(data)
+    temp.str <- strata
+    strata <- tapply(1L:n, as.numeric(strata))
+    if ((n == 0) || is.null(n)) 
+        stop("no data in call to boot")
+    if (sim != "parametric") {
+        if ((sim == "antithetic") && is.null(L)) 
+            L <- empinf(data = data, statistic = statistic, stype = stype, 
+                strata = strata, ...)
+        if (sim != "ordinary") 
+            m <- 0
+        else if (any(m < 0)) 
+            stop("negative value of m supplied")
+        if ((length(m) != 1L) && (length(m) != length(table(strata)))) 
+            stop("length of m incompatible with strata")
+        if ((sim == "ordinary") || (sim == "balanced")) {
+            if (isMatrix(weights) && (nrow(weights) != length(R))) 
+                stop("dimensions of R and weights do not match")
+        }
+        else weights <- NULL
+        if (!is.null(weights)) 
+            weights <- t(apply(matrix(weights, n, length(R), 
+                byrow = TRUE), 2, normalize, strata))
+        if (!simple) 
+            i <- index.array(n, R, sim, strata, m, L, weights)
+        if (stype == "f") 
+            original <- rep(1, n)
+        else if (stype == "w") {
+            ns <- tabulate(strata)[strata]
+            original <- 1/ns
+        }
+        else original <- 1L:n
+        if (sum(m) > 0) {
+            t0 <- statistic(data, original, rep(1, sum(m)), ...)
+            lt0 <- length(t0)
+        }
+        else {
+            t0 <- statistic(data, original, ...)
+            lt0 <- length(t0)
+        }
+    }
+    else {
+        t0 <- statistic(data, ...)
+        lt0 <- length(t0)
+    }
+    t.star <- matrix(NA, sum(R), lt0)
+    pred.i <- NULL
+    if (sim == "parametric") {
+        for (r in 1L:R) {
+            t.star[r, ] <- statistic(ran.gen(data, mle), ...)
+        }
+    }
+    else {
+        if (!simple && ncol(i) > n) {
+            pred.i <- as.matrix(i[, (n + 1L):ncol(i)])
+            i <- i[, 1L:n]
+        }
+        if (stype == "f") {
+print("CASE 1")
+            f <- freq.array(i)
+            if (sum(m) == 0) 
+                for (r in 1L:sum(R)) t.star[r, ] <- statistic(data, 
+                  f[r, ], ...)
+            else for (r in 1L:sum(R)) t.star[r, ] <- statistic(data, 
+                f[r, ], pred.i[r, ], ...)
+        }
+        else if (stype == "w") {
+print("CASE 2")
+            f <- freq.array(i)
+            if (sum(m) == 0) 
+                for (r in 1L:sum(R)) t.star[r, ] <- statistic(data, 
+                  f[r, ]/ns, ...)
+            else for (r in 1L:sum(R)) t.star[r, ] <- statistic(data, 
+                f[r, ]/ns, pred.i[r, ], ...)
+        }
+        else if (sum(m) > 0) {
+print("CASE 3")
+            for (r in 1L:sum(R)) t.star[r, ] <- statistic(data, 
+                i[r, ], pred.i[r, ], ...)
+        }
+        else if (simple) {
+print("CASE 4")
+            for (r in 1L:sum(R)) {
+                inds <- index.array(n, 1, sim, strata, m, L, 
+                  weights)
+                t.star[r, ] <- statistic(data, inds, ...)
+            }
+        }
+        else {
+cat("CASE 5 - sum(R)=",sum(R))
+            # for (r in 1L:sum(R)) t.star[r, ] <- statistic(data, i[r, ], ...)
+            alists = list()
+            for (r in 1L:sum(R)) alists[[r]] <- list(data,i[r,],...)
+            reslist = swiftapplyb(statistic,alists,callsperbatch=25)
+            for (r in 1L:sum(R)) t.star[r, ] <- reslist[[r]]
+        }
+    }
+    dimnames(t.star) <- NULL
+    if (is.null(weights)) 
+        weights <- 1/tabulate(strata)[strata]
+    boot.return(sim, t0, t.star, temp.str, R, data, statistic, 
+        stype, call, seed, L, m, pred.i, weights, ran.gen, mle)
+}

Added: SwiftApps/swiftapply.swift
===================================================================
--- SwiftApps/swiftapply.swift	                        (rev 0)
+++ SwiftApps/swiftapply.swift	2010-02-23 02:26:49 UTC (rev 3248)
@@ -0,0 +1,13 @@
+type RFile;
+
+app (RFile result) RunR (RFile rcall)
+{
+  RunR @rcall @result;
+}
+
+RFile rcalls[]  <simple_mapper; prefix="cbatch.", suffix=".Rdata", padding=0>;
+RFile results[] <simple_mapper; prefix="rbatch.", suffix=".Rdata", padding=0>;
+
+foreach c, i in rcalls {
+  results[i] = RunR(c);
+}