[Swift-commit] r3248 - SwiftApps
noreply at svn.ci.uchicago.edu
noreply at svn.ci.uchicago.edu
Mon Feb 22 20:26:49 CST 2010
Author: wilde
Date: 2010-02-22 20:26:49 -0600 (Mon, 22 Feb 2010)
New Revision: 3248
Added:
SwiftApps/RunR.sh
SwiftApps/RunSwiftScript.sh
SwiftApps/Swift.R
SwiftApps/TestSwift.R
SwiftApps/bootstrapdemo.R
SwiftApps/pboot.R
SwiftApps/swiftapply.swift
Log:
Initial revision of Swift R interface.
Added: SwiftApps/RunR.sh
===================================================================
--- SwiftApps/RunR.sh (rev 0)
+++ SwiftApps/RunR.sh 2010-02-23 02:26:49 UTC (rev 3248)
@@ -0,0 +1,13 @@
+#! /usr/bin/env Rscript
+
+argv = commandArgs(TRUE)
+
+load(argv[1]);
+
+result=list()
+for(c in 1:length(rcall$arglistbatch)) {
+ # FIXME: run this under try/catch and save error status in results object (need to make it a list: rval + error status)
+ result[[c]] = do.call( rcall$func, rcall$arglistbatch[[c]] )
+}
+
+save(result,file=argv[2])
Property changes on: SwiftApps/RunR.sh
___________________________________________________________________
Name: svn:executable
+
Added: SwiftApps/RunSwiftScript.sh
===================================================================
--- SwiftApps/RunSwiftScript.sh (rev 0)
+++ SwiftApps/RunSwiftScript.sh 2010-02-23 02:26:49 UTC (rev 3248)
@@ -0,0 +1,32 @@
+rundir=$1
+site=$2
+
+cd $rundir
+
+cat >tc <<EOF
+$site RunR /home/wilde/SwiftR/RunR.sh null null null
+EOF
+
+cat >sites.xml <<EOF
+<config>
+ <pool handle="local">
+ <execution provider="local" url="none" />
+ <profile namespace="karajan" key="initialScore">10000</profile>
+ <profile namespace="karajan" key="jobThrottle">.03</profile>
+ <filesystem provider="local"/>
+ <workdirectory>$(pwd)</workdirectory>
+ </pool>
+ <pool handle="pbs">
+ <profile namespace="globus" key="maxwalltime">00:00:10</profile>
+ <profile namespace="globus" key="maxtime">1800</profile>
+ <execution provider="coaster" url="none" jobManager="local:pbs"/>
+ <profile namespace="globus" key="workersPerNode">8</profile>
+ <profile namespace="karajan" key="initialScore">10000</profile>
+ <profile namespace="karajan" key="jobThrottle">.00</profile>
+ <filesystem provider="local"/>
+ <workdirectory>$(pwd)</workdirectory>
+ </pool>
+</config>
+EOF
+
+swift -tc.file tc -sites.file sites.xml ../swiftapply.swift
Property changes on: SwiftApps/RunSwiftScript.sh
___________________________________________________________________
Name: svn:executable
+
Added: SwiftApps/Swift.R
===================================================================
--- SwiftApps/Swift.R (rev 0)
+++ SwiftApps/Swift.R 2010-02-23 02:26:49 UTC (rev 3248)
@@ -0,0 +1,76 @@
+swiftapply <- function( func, arglists, site="local", callsperbatch=1 )
+{
+ rundir = system("mktemp -d SwiftR.run.XXX",intern=TRUE)
+ cat("Running in ",rundir,"\n")
+ narglists = length(arglists) # number of arglists to process
+ batch=1 # Next arglist batch number to fill
+ arglist=1 # Next arglist number to insert
+ while(arglist <= narglists) {
+ arglistsleft = narglists - arglist + 1
+ if(arglistsleft >= callsperbatch) {
+ batchsize = callsperbatch
+ }
+ else {
+ batchsize = arglistsleft
+ }
+ arglistbatch = list()
+ for(i in 1 : batchsize) {
+ arglistbatch[[i]] = arglists[[arglist]]
+ arglist = arglist +1
+ }
+ rcall = list(func=func,arglistbatch=arglistbatch)
+ save(rcall,file=paste(rundir,"/cbatch.",as.character(batch),".Rdata",sep=""))
+ batch = batch + 1;
+ }
+ nbatches = batch - 1
+ system(paste("./RunSwiftScript.sh",rundir,"local",sep=" "))
+
+ rno = 1
+ rlist = list()
+ for(batch in 1:nbatches) {
+ load(paste(rundir,"/rbatch.",as.character(batch),".Rdata",sep=""))
+ nresults = length(result)
+ for(r in 1:nresults) {
+ rlist[[rno]] = result[[r]]
+ rno = rno + 1
+ }
+ }
+ return(rlist)
+}
+
+TODO="
+
+x n args
+x batch
+ into svn
+ unique dirs
+ select sites and swift args (throttles etc)
+ R docs
+ R package (SwiftR)
+ Swift docs
+ async exec
+ clean up boot: fix all calls to statistics
+ error handling and null and missing values: ensure res#s correspond to arg#s
+ status
+ specify swift scripts
+ run async and grab status (track 'runs' in R)
+ increm result collect
+ pass the func as val
+ pass extra funcs and packages required
+ pass extra vals
+ pass extra files
+ specifiy unique swift scritps ala Dirk's tools
+ setup the R envs
+ coasters for persistent R Servers
+ test suites
+ use littleR
+ args as alists vs args as list
+ runids, output logging
+ select exec sites and swift params etc
+ make polymorphic to *apply and snow
+ stream results back to R (so use can inspect as they arrive)
+ (pull them in with a Swift.poll() func)
+ handle discontiguous results
+ return good error messages including messages from R eval and from Swift
+
+END"
Added: SwiftApps/TestSwift.R
===================================================================
--- SwiftApps/TestSwift.R (rev 0)
+++ SwiftApps/TestSwift.R 2010-02-23 02:26:49 UTC (rev 3248)
@@ -0,0 +1,31 @@
+require(boot)
+source("Swift.R")
+
+args=list(ducks,dogs)
+sumcrits <- function(duckdata,dogdata) { sum( duckdata$plumage, dogdata$mvo ) }
+res = do.call(sumcrits,args)
+cat("Test of do.call(sumcrits)\n")
+print(res)
+
+arglist = rep(list(args),9)
+
+cat("\nTest of swiftapply(sumcrits,arglist)\n")
+res = swiftapply(sumcrits,arglist)
+print(res)
+
+cat("\nTest of swiftapply(sumcrits,arglist,callsperbatch=10)\n")
+res = swiftapply(sumcrits,arglist,callsperbatch=10)
+print(res)
+
+cat("\nTest of swiftapply(sumcrits,arglist,callsperbatch=2)\n")
+res = swiftapply(sumcrits,arglist,callsperbatch=2)
+print(res)
+
+cat("\nTest of swiftapply(sumcrits,arglist,callsperbatch=3)\n")
+res = swiftapply(sumcrits,arglist,callsperbatch=3)
+print(res)
+
+cat("\nTest of swiftapply(sumcrits,arglist,callsperbatch=20)\n")
+res = swiftapply(sumcrits,arglist,callsperbatch=20)
+print(res)
+
Added: SwiftApps/bootstrapdemo.R
===================================================================
--- SwiftApps/bootstrapdemo.R (rev 0)
+++ SwiftApps/bootstrapdemo.R 2010-02-23 02:26:49 UTC (rev 3248)
@@ -0,0 +1,71 @@
+#
+# OpenMx Script to demonstrate use of R's boot package for bootstrapping
+#
+# Author: M.C. Neale 1 September 2009
+#
+
+# Load required libraries
+require(OpenMx)
+require(boot)
+
+# Define a function called mles which will return maximum likelihood estimates
+# It uses the demoOneFactor dataset and one factor model on the OpenMx homepage
+# http://openmx.psyc.virginia.edu
+
+
+mles<-function(dataset,wt){
+cat("in mles=");
+require(OpenMx)
+ manifests <- names(dataset)
+ latents <- c("G")
+ covwt <- cov.wt(dataset,wt)
+ mlevals <- mxRun(mxModel("One Factor", type="RAM",
+ manifestVars = manifests,
+ latentVars = latents,
+ mxPath(from=latents, to=manifests),
+ mxPath(from=manifests, arrows=2),
+ mxPath(from=latents, arrows=2,
+ free=F, values=1.0),
+ mxData(covwt$cov, type="cov",
+ numObs=500)))
+ return(as.vector(mlevals at output$estimate))}
+
+# Run 100 bootstraps (a smallish number)
+
+boot.out=list()
+
+boot.out[[1]] = pboot(demoOneFactor,mles,R=100)
+#boot.out[[2]] = boot(demoOneFactor,mles,R=8)
+#boot.out[[3]] = boot(demoOneFactor,mles,R=9)
+
+print("done booting - boot.out is:")
+print(boot.out)
+print("end of boot.out")
+
+# For comparison, take a look at the SE output from running the homepage job once
+data(demoOneFactor)
+manifests <- names(demoOneFactor)
+latents <- c("G")
+factorModel <- mxModel("One Factor", type="RAM",
+ manifestVars = manifests,
+ latentVars = latents,
+ mxPath(from=latents, to=manifests),
+ mxPath(from=manifests, arrows=2),
+ mxPath(from=latents, arrows=2,
+ free=F, values=1.0),
+ mxData(cov(demoOneFactor), type="cov",
+ numObs=500))
+facrun<-mxRun(factorModel)
+summary(facrun)
+
+# the estimates and standard errors should match up pretty well, though the number of replicates R above might be increased
+# therefore, only the factorModel estimates are compared:
+
+loadings<-facrun at matrices$A at values[1:5,6]
+errors<-diag(facrun at matrices$S at values[1:5,1:5])
+estimates<-as.vector(c(loadings,errors))
+omxCheckCloseEnough(as.vector(c(0.3971525,0.5036615,0.5772418,0.7027743,0.7962506,0.04081422,0.03802001,0.04082720,0.03938708,0.03628711)),estimates,.001)
+
+# The above should indicate that the results are close enough.
+
+
Added: SwiftApps/pboot.R
===================================================================
--- SwiftApps/pboot.R (rev 0)
+++ SwiftApps/pboot.R 2010-02-23 02:26:49 UTC (rev 3248)
@@ -0,0 +1,118 @@
+pboot =
+function (data, statistic, R, sim = "ordinary", stype = "i",
+ strata = rep(1, n), L = NULL, m = 0, weights = NULL,
+ ran.gen = function(d, p) d, mle = NULL, simple = FALSE, ...)
+{
+ call <- match.call()
+ if (simple && (sim != "ordinary" || stype != "i" || sum(m))) {
+ warning("'simple=TRUE' is only valid for 'sim=\"ordinary\", stype=\"i\", n=0, so ignored")
+ simple <- FALSE
+ }
+ if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE))
+ runif(1)
+ seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE)
+ if (isMatrix(data))
+ n <- nrow(data)
+ else n <- length(data)
+ temp.str <- strata
+ strata <- tapply(1L:n, as.numeric(strata))
+ if ((n == 0) || is.null(n))
+ stop("no data in call to boot")
+ if (sim != "parametric") {
+ if ((sim == "antithetic") && is.null(L))
+ L <- empinf(data = data, statistic = statistic, stype = stype,
+ strata = strata, ...)
+ if (sim != "ordinary")
+ m <- 0
+ else if (any(m < 0))
+ stop("negative value of m supplied")
+ if ((length(m) != 1L) && (length(m) != length(table(strata))))
+ stop("length of m incompatible with strata")
+ if ((sim == "ordinary") || (sim == "balanced")) {
+ if (isMatrix(weights) && (nrow(weights) != length(R)))
+ stop("dimensions of R and weights do not match")
+ }
+ else weights <- NULL
+ if (!is.null(weights))
+ weights <- t(apply(matrix(weights, n, length(R),
+ byrow = TRUE), 2, normalize, strata))
+ if (!simple)
+ i <- index.array(n, R, sim, strata, m, L, weights)
+ if (stype == "f")
+ original <- rep(1, n)
+ else if (stype == "w") {
+ ns <- tabulate(strata)[strata]
+ original <- 1/ns
+ }
+ else original <- 1L:n
+ if (sum(m) > 0) {
+ t0 <- statistic(data, original, rep(1, sum(m)), ...)
+ lt0 <- length(t0)
+ }
+ else {
+ t0 <- statistic(data, original, ...)
+ lt0 <- length(t0)
+ }
+ }
+ else {
+ t0 <- statistic(data, ...)
+ lt0 <- length(t0)
+ }
+ t.star <- matrix(NA, sum(R), lt0)
+ pred.i <- NULL
+ if (sim == "parametric") {
+ for (r in 1L:R) {
+ t.star[r, ] <- statistic(ran.gen(data, mle), ...)
+ }
+ }
+ else {
+ if (!simple && ncol(i) > n) {
+ pred.i <- as.matrix(i[, (n + 1L):ncol(i)])
+ i <- i[, 1L:n]
+ }
+ if (stype == "f") {
+print("CASE 1")
+ f <- freq.array(i)
+ if (sum(m) == 0)
+ for (r in 1L:sum(R)) t.star[r, ] <- statistic(data,
+ f[r, ], ...)
+ else for (r in 1L:sum(R)) t.star[r, ] <- statistic(data,
+ f[r, ], pred.i[r, ], ...)
+ }
+ else if (stype == "w") {
+print("CASE 2")
+ f <- freq.array(i)
+ if (sum(m) == 0)
+ for (r in 1L:sum(R)) t.star[r, ] <- statistic(data,
+ f[r, ]/ns, ...)
+ else for (r in 1L:sum(R)) t.star[r, ] <- statistic(data,
+ f[r, ]/ns, pred.i[r, ], ...)
+ }
+ else if (sum(m) > 0) {
+print("CASE 3")
+ for (r in 1L:sum(R)) t.star[r, ] <- statistic(data,
+ i[r, ], pred.i[r, ], ...)
+ }
+ else if (simple) {
+print("CASE 4")
+ for (r in 1L:sum(R)) {
+ inds <- index.array(n, 1, sim, strata, m, L,
+ weights)
+ t.star[r, ] <- statistic(data, inds, ...)
+ }
+ }
+ else {
+cat("CASE 5 - sum(R)=",sum(R))
+ # for (r in 1L:sum(R)) t.star[r, ] <- statistic(data, i[r, ], ...)
+ alists = list()
+ for (r in 1L:sum(R)) alists[[r]] <- list(data,i[r,],...)
+ reslist = swiftapplyb(statistic,alists,callsperbatch=25)
+ for (r in 1L:sum(R)) t.star[r, ] <- reslist[[r]]
+ }
+ }
+ dimnames(t.star) <- NULL
+ if (is.null(weights))
+ weights <- 1/tabulate(strata)[strata]
+ boot.return(sim, t0, t.star, temp.str, R, data, statistic,
+ stype, call, seed, L, m, pred.i, weights, ran.gen, mle)
+}
Added: SwiftApps/swiftapply.swift
===================================================================
--- SwiftApps/swiftapply.swift (rev 0)
+++ SwiftApps/swiftapply.swift 2010-02-23 02:26:49 UTC (rev 3248)
@@ -0,0 +1,13 @@
+type RFile;
+
+app (RFile result) RunR (RFile rcall)
+{
+ RunR @rcall @result;
+}
+
+RFile rcalls[] <simple_mapper; prefix="cbatch.", suffix=".Rdata", padding=0>;
+RFile results[] <simple_mapper; prefix="rbatch.", suffix=".Rdata", padding=0>;
+
+foreach c, i in rcalls {
+ results[i] = RunR(c);
+}
More information about the Swift-commit
mailing list