[Swift-commit] r4036 - in SwiftApps/SwiftR: . Swift/R Swift/exec Swift/man
noreply at svn.ci.uchicago.edu
noreply at svn.ci.uchicago.edu
Tue Jan 25 15:35:30 CST 2011
Author: tga
Date: 2011-01-25 15:35:30 -0600 (Tue, 25 Jan 2011)
New Revision: 4036
Modified:
SwiftApps/SwiftR/JOINT-TODO
SwiftApps/SwiftR/Swift/R/Swift.R
SwiftApps/SwiftR/Swift/R/Workers.R
SwiftApps/SwiftR/Swift/exec/start-swift
SwiftApps/SwiftR/Swift/man/Swift-package.Rd
SwiftApps/SwiftR/Swift/man/swiftInit.Rd
SwiftApps/SwiftR/Swift/man/swiftapply.Rd
Log:
* SGE bugfix: correctly capture jobid
* Updated package help file
* All swiftApply and swiftInit options can now be set through options()
Modified: SwiftApps/SwiftR/JOINT-TODO
===================================================================
--- SwiftApps/SwiftR/JOINT-TODO 2011-01-25 19:23:13 UTC (rev 4035)
+++ SwiftApps/SwiftR/JOINT-TODO 2011-01-25 21:35:30 UTC (rev 4036)
@@ -6,7 +6,6 @@
- Local multi-core
- PBS
- SSH
-* No hanging on FIFO reads/writes
* Working on the following platforms for development use:
- Cobalt
- Slurm
Modified: SwiftApps/SwiftR/Swift/R/Swift.R
===================================================================
--- SwiftApps/SwiftR/Swift/R/Swift.R 2011-01-25 19:23:13 UTC (rev 4035)
+++ SwiftApps/SwiftR/Swift/R/Swift.R 2011-01-25 21:35:30 UTC (rev 4036)
@@ -48,8 +48,19 @@
tmpdir <- getOption("swift.tmpdir")
if(is.null(tmpdir))
tmpdir <- Sys.getenv("SWIFTR_TMP");
- if(tmpdir=="")
+
+ if(tmpdir=="") {
+ if(is.null(tmpdir))
+ tmpdir <- getOption("swift.tmpdir")
tmpdir <- "/tmp";
+ }
+
+ if(is.null(quiet)) {
+ quiet <- getOption("swift.quiet")
+ if(is.null(quiet))
+ quiet <- FALSE
+ }
+
if (! quiet) {
cat("\nSwift properties:\n")
cat(" server =", server,"\n")
@@ -82,7 +93,7 @@
# basedir = paste("/tmp/",user,"/SwiftR/requests",sep="")
# reqdir <- system(paste("mktemp -d ", basedir, "/SwiftR.run.XXXX",sep=""),intern=TRUE)
reqdir = sprintf("%s/R%.7d",requestdirbase,requestid)
- dir.create(reqdir,showWarnings=FALSE)
+ dir.create(reqdir,recursive=TRUE,showWarnings=FALSE)
#dir.create(reqdir,showWarnings=TRUE)
if (! quiet) {
cat("Swift request is in",reqdir,"\n")
Modified: SwiftApps/SwiftR/Swift/R/Workers.R
===================================================================
--- SwiftApps/SwiftR/Swift/R/Workers.R 2011-01-25 19:23:13 UTC (rev 4035)
+++ SwiftApps/SwiftR/Swift/R/Workers.R 2011-01-25 21:35:30 UTC (rev 4036)
@@ -8,7 +8,6 @@
rcmd=NULL, time=NULL,
workerLogging=NULL )
{
- #TODO: document function
# server: which server backend to use to acquire workers
# for example, local runs tasks on the local machine
# pbs, uses the PBS scheduler to obtain nodes on a cluster,
@@ -37,6 +36,8 @@
# Presume UNIX path names - start-swift script
cmdString <- file.path(.find.package("Swift"), "exec/start-swift-daemon")
+ if(is.null(cores))
+ cores <- getOption("swift.cores")
if(! is.null(cores) ) {
cmdString <- paste(cmdString, "-c", shQuote(cores))
}
@@ -46,30 +47,58 @@
if(! is.null(server) ) {
cmdString <- paste(cmdString, "-s", shQuote(server))
}
+
+ if(is.null(hosts))
+ hosts <- getOption("swift.hosts")
if(! is.null(hosts) ) {
cmdString <- paste(cmdString, "-h", shQuote(hosts) )
}
+
+ if(is.null(parEnv))
+ parEnv <- getOption("swift.parenv")
if(! is.null(parEnv) ) {
cmdString <- paste(cmdString, "-e", shQuote(parEnv))
}
+
+ if(is.null(workmode))
+ workmode <- getOption("swift.workmode")
if(! is.null(workmode) ) {
cmdString <- paste(cmdString, "-m", shQuote(workmode))
}
+
+ if(is.null(nodes))
+ nodes <- getOption("swift.nodes")
if(! is.null(nodes) ) {
cmdString <- paste(cmdString, "-n", shQuote(nodes))
}
+
+ if(is.null(throttle))
+ throttle <- getOption("swift.throttle")
if(! is.null(throttle) ) {
cmdString <- paste(cmdString, "-p", shQuote(throttle))
}
+
+ if(is.null(queue))
+ queue <- getOption("swift.queue")
if(! is.null(queue) ) {
cmdString <- paste(cmdString, "-q", shQuote(queue))
}
+
+ if(is.null(rcmd))
+ rcmd <- getOption("swift.rcmd")
if(! is.null(rcmd) ) {
cmdString <- paste(cmdString, "-r", shQuote(rcmd))
}
+
+
+ if(is.null(time))
+ time <- getOption("swift.time")
if(! is.null(time) ) {
cmdString <- paste(cmdString, "-t", shQuote(time))
}
+
+ if(is.null(workerLogging))
+ workerLogging <- getOption("swift.workerLogging")
if(! is.null(workerLogging) ) {
cmdString <- paste(cmdString, "-w", shQuote(workerLogging))
}
Modified: SwiftApps/SwiftR/Swift/exec/start-swift
===================================================================
--- SwiftApps/SwiftR/Swift/exec/start-swift 2011-01-25 19:23:13 UTC (rev 4035)
+++ SwiftApps/SwiftR/Swift/exec/start-swift 2011-01-25 21:35:30 UTC (rev 4036)
@@ -4,6 +4,15 @@
export TRAPEVENTS="EXIT 1 2 3 15" # Signals and conditions to trap
+
+# Standard clenuup actions
+function stdcleanup {
+ # don't accept any more requests: unlink fifo from filesystem
+ if [ -p requestpipe ]; then
+ rm requestpipe
+ fi
+}
+
# Define internal functions
get-contact()
@@ -173,6 +182,10 @@
if [ $parEnv != NONE ]; then
parEnvDirective="#$ -pe $parEnv $(($nodes*$cores))"
else
+ if [ $nodes -gt 1 ]; then
+ echo "Warning: requested $nodes nodes without parEnv directive"
+ echo "SGE provider is defaulting to a single node and $cores cores"
+ fi
parEnvDirective=""
fi
@@ -198,16 +211,28 @@
# $ -A TG-DBS080004N
cd /
+ HOST=$(echo $CONTACT | sed -e 's,^http://,,' -e 's/:.*//')
+ PORT=$(echo $CONTACT | sed -e 's,^.*:,,')
+ if [ "\$PE_HOSTFILE" = "" ] ; then
+ # Not a parallel environment, just run job on this host
+ # with the specified number of cores
+ echo '***' Single host \$(hostname) CONTACT:$CONTACT
+ # Mimic the first two pe_hostfile columns
+ HOSTS="\$(hostname) $cores"
+ else
+ echo '***' PE_HOSTFILE file: \$PE_HOSTFILE CONTACT:$CONTACT
+ cat \$PE_HOSTFILE
+ HOSTS=\$(cat \$PE_HOSTFILE)
+ fi
+
+
+
if [ $workmode = slot ]; then
- NODES=\`cat \$PE_HOSTFILE | awk '{ for(i=0;i<\$2;i++){print \$1} }'\`
+ NODES=\`echo "\$HOSTS" | awk '{ for(i=0;i<\$2;i++){print \$1} }'\`
else
- NODES=\`cat \$PE_HOSTFILE | awk '{print \$1}'\` # Better for Ranger, Eddie, ...
+ NODES=\`echo "\$HOSTS" | awk '{print \$1}'\` # Better for Ranger, Eddie, ...
fi
- HOST=$(echo $CONTACT | sed -e 's,^http://,,' -e 's/:.*//')
- PORT=$(echo $CONTACT | sed -e 's,^.*:,,')
- echo '***' PE_HOSTFILE file: \$PE_HOSTFILE CONTACT:$CONTACT
- cat \$PE_HOSTFILE
for h in \$NODES; do
workerCmd="echo Swift R startup running on host; hostname; cd /; WORKER_LOGGING_LEVEL=$workerLogging /usr/bin/perl $SWIFTBIN/worker.pl $CONTACT SwiftR-\$h $HOME/.globus/coasters $IDLETIMEOUT"
@@ -233,9 +258,31 @@
# FIXME: set up for capturing batch job id: rm -rf remotepid.* # FIXME: should not be needed if we start in a new dir each time
make-${server}-submit-file
- qsub batch.sub >$jobidfile
+ #FIXME: doesn't work for SGE on IBI cluster as there is additional text
+ # returned by qsub
+
+ if [ "${server}" != "sge" ]
+ then
+ qsub batch.sub >$jobidfile
+ succ=$?
+ else
+ # Sun grid engine inconviently returns a bunch of text surrounding
+ # the job id. There is no documented way to obtain the job number
+ # directly from qsub. We will parse out the first number in this text
+ # and assume this is the job ID (this is true for versions of SGE
+ # this was tested on).
+ qsub batch.sub | sed 's/[^0-9 ]//g' | awk '{ print $1 }' > $jobidfile
+ succ=$?
+ fi
- echo Started workers from batch job $(cat $jobidfile)
+ if [ $? -eq 0 ]
+ then
+ echo Started workers from batch job $(cat $jobidfile)
+ else
+ echo Batch queue submission failed, exiting.
+ stdcleanup
+ exit 1
+ fi
}
verify-is-one-of()
@@ -385,12 +432,6 @@
out=swift.stdouterr
touch $out
-function stdcleanup {
- # don't accept any more requests: unlink fifo from filesystem
- if [ -p requestpipe ]; then
- rm requestpipe
- fi
-}
if [ $server = local ]; then
@@ -477,7 +518,7 @@
jobid=$(cat $jobidfile)
echo Terminating worker processes starter $starterpid and batch job $jobid
if [ "_$starterpid" != _ ]; then
- kill $starterpid
+ kill $starterpid &> /dev/null
fi
if [ "_$jobid" != _ ]; then
qdel "$jobid"
Modified: SwiftApps/SwiftR/Swift/man/Swift-package.Rd
===================================================================
--- SwiftApps/SwiftR/Swift/man/Swift-package.Rd 2011-01-25 19:23:13 UTC (rev 4035)
+++ SwiftApps/SwiftR/Swift/man/Swift-package.Rd 2011-01-25 21:35:30 UTC (rev 4036)
@@ -72,7 +72,7 @@
r = swiftLapply(seq(1,10),sqrt)
}
-Currenty swiftLapply is the only one implemented (i.e. swiftSapply etc are not yet provided but will be soon).
+Currently swiftLapply is the only one implemented (i.e. swiftSapply etc are not yet provided but will be soon).
Arbitrary R objects can be passed. For example:
@@ -87,17 +87,21 @@
res = swiftapply(sumstuff, arglist)
}
-As a preliminary interface, you can set R options() to control the
-operation of the functions in the Swift package:
+\code{swiftapply} and \code{swiftInit} take a range of arguments to
+control the setup of the cluster and how parallel apply calls are handled.
+You can set global default values of these settings through R options().
+
+Some key options are:
+
options(swift.callsperbatch=n) # n = number of R calls to perform in
each Swift job.
options(swift.server="servername") # servername = "local" to run on
-the current host, "pbs" to submit to a local PBS cluster, "pbsf" to
-run on clusters such as Merlot which have firewalls that restrict
-outbound cnnectivity from the worker nodes to the Swift server running
-on the login node.
+the current host, "ssh" to run on remote machines via ssh, "pbs" to submit to a local PBS cluster and "sge" for a local Sun Grid Engine cluster. An additional
+setting, "pbsf" is provided for clusters such as Merlot which have
+firewalls that restrict outbound cnnectivity from the worker nodes
+to the Swift server running on the login node.
options(swift.keepwork=TRUE) # Retain the temporary files that the
Swift functions use to pass R data from client t remote R
@@ -157,42 +161,68 @@
(b) One or more remote machines, possibly each a multicore, accessed via ssh
-(c) Clusters running PBS, SGE, or Condor schedulers
+(c) Clusters running PBS, or SGE schedulers. More cluster schedulers will
+ be supported in the future.
-In configurations (b) and (c) Swift will launch its own workers, and
-then communicate using its own TCP protocol.
+You can select between options a), b) and c) using the "swift.server" option
+or by providing a "server" argument to \code{swiftInit} and \code{swiftapply}.
+Other arguments, documented on the \code{swiftInit} manual page, allow
+you to specify the parameters, such as number of cores.
-Swift workers must be able to connect back to the Swift server on TCP
+In cases b) and c), the Swift workers, running on remote machines or
+cluster nodes, must be able to connect back to the Swift server (running
+on the same machine as R) on TCP
ports in the range of 30000 and higher. (FIXME: determine specifics).
+
+
If this is not available on a cluster (e.g., Merlot), then the pbsf
server will tunnel the Swift port over the standard ssh port, assuming
-that is reachable.
+that is reachable. If pbsf is in use, and
+your Swift server machine has multiple network
+interfaces, you may need to set the GLOBUS_HOSTNAME environment variable
+to specify the network address workers should connect to.
+
+
}
\section{INSTALLATION}{
+Installation is through the standard R CMD INSTALL command.
+
+If you have access to install libraries directly in your R
+installation.
\preformatted{
-mkdir ~/RPackages ~/RLibrary # if not already created
-cd ~/RPackages
-wget http://www.ci.uchicago.edu/~wilde/Swift_0.1.tar.gz
-R CMS INSTALL -l ~/RLibrary Swift_0.1.tar.gz
-export R_LIBS=~/RLibrary
+wget http://www.ci.uchicago.edu/~wilde/Swift_0.2.tar.gz
+R CMD INSTALL Swift_0.2.tar.gz
+}
-export GLOBUS_HOSTNAME=10.0.0.200 # Eg for Merlot: internal address of the login node}
+If you keep your libraries in a separate location in your home directory,
+or you do not have access to modify the R installation.
+\preformatted{
+mkdir ~/RLibrary # if you have not already created a library folder
+wget http://www.ci.uchicago.edu/~wilde/Swift_0.2.tar.gz
+R CMD INSTALL -l ~/RLibrary Swift_0.2.tar.gz
+
+# tell R where to find the library: add this to your .bashrc file or equivalent
+export R_LIBS=~/RLibrary:$R_LIBS
+
}
+}
\section{QUICK_START}{
-
+In a terminal window:
\preformatted{
-In a shell (outside of R) start the local Swift server:
-$HOME/RLibrary/Swift/exec/swift-start local #
+$ export R_LIBS=$HOME/RLibrary
-export R_LIBS=$HOME/RLibrary
-
-R
+$ R
+}
+Now in R:
+\preformatted{
> require(Swift)
+> options(swift.server="local")
+> swiftInit(cores=4) # start up Swift on your local machine
> basicSwiftTest() # should take about 1 second
> runAllSwiftTests() # should take < 60 seconds
@@ -219,40 +249,34 @@
\section{START_SERVERS}{
-To run swiftapply() and any of the swiftXapply() functions, you first
+To run \code{swiftapply()} and any of the \code{swiftXapply()}
+functions, you first
start one or more "Swift servers" on your local host (where you will
-run the R client workspace.
-
-Currently you must do this manually and in your login shell, outside
-of R - BEFORE trying to run R Swift functions. If you run swiftapply()
-without a Swift server running, your R session will hang and you will
-need to kill it. This issue will be resolved shortly.
-
-The start-swift command (and all related shell scripts) are located in the installed package "exec" directry, so its handy to set a shell variable to point there:
-
-SWIFT=<your package install dir>/Swift/
-
+run the R client workspace. You can do this with the \code{swiftInit()}
+function in R.
Examples of starting the Swift server follow.
-To run N parallel R servers on the local host, one for each core:
+To run 4 parallel R servers on the local host:
-\verb{$SWIFT/exec/start-swift}
+\verb{options(swift.server="local")}
+\verb{swiftInit(cores=4)}
-To run 4 R servers:
-
-\verb{$SWIFT/exec/start-swift -c 4}
-
To run 4 R servers on each of two hosts that can be reach by ssh:
-\verb{$SIFT/exec/start-swift -s ssh -c 4 -h "hostname1 hostname2"}
+\verb{options(swift.server="ssh")}
+\verb{swiftInit(cores="4", hosts="hostname1 hostname2")}
To run 8 R servers for 30 minutes on each of 3 nodes of the Merlot cluster, run this on the login host "merlot", using its "serial" queue:
-\verb{$SWIFT/exec/start-swift -s pbsf -c 8 -n 3 -t 00:30:00 -q serial}
+\verb{options(swift.server="pbsf")}
+\verb{swiftInit(cores=8, nodes=3, time="00:30:00",
+ queue="serial")}
-These Swift servers can be started and left running, across R runs
+These Swift servers can be started and left running for multiple
+\code{swiftapply()}
+calls, and will be shut down when you close your R session, when the Swift
+package is unloaded or when \code{swiftShutdown()} is called.
-options(swift.server="local") # or "pbsman" or "ssh"
}
\section{TESTS}{
@@ -263,6 +287,7 @@
# Start swift local server as above
require(Swift)
+initSwift()
basicSwiftTest()
}
@@ -270,6 +295,7 @@
\preformatted{
require(Swift)
+initSwift()
runAllSwiftTests()
}
@@ -281,60 +307,6 @@
}
-\section{STOPPING_SWIFT_SERVERS}{
-
-The following ps command is useful for displaying the many background
-swift processes. I keep this aliased as "mp" (my processes):
-
- alias mp='ps -fjH -u $USER'
-
-Local (swift-start local):
-
-$ jobs
-$ kill %1
-
-Remote (swift-start ssh):
-
-$ jobs
-$ kill %1 # This tries to track down the remote processes and kill them
-
-Cluster (swift-start pbs):
-
-$ jobs
-$ kill %1 # Swift should terminate its queued and/or running cluster jobs
-
-
-Occaasionally a killall R and/or killall java is required
-
-}
-
-\section{USAGE}{
-
-Swift returns Error object when remote side fails.
-
-swiftapply( )
-
-
-options:
-
- swift.server: matched server name on start-swift
-
- swift.callsperbatch
-
- initialize:
-
-less likely to touch:
- remove temp reqs (sp???) FIXME
- mode (service, manual, ???)
-
-Other Swift functions (compatible with Snow/Snowfall packages):
-
-swiftLapply
-
-To be developed: swiftSapply, ...
-
-}
-
\section{OPENMX_EXAMPLES}{
This section is specific to users of the OpenMX R package for
@@ -342,54 +314,42 @@
}
-\section{USING_OTHER_PARALLEL_ENVIRONMENTS}{
+\section{SSH-specific issues}{
-3) ssh confiured for password-free login (to run on remote worker nodes)
+For the "ssh" server to work correctly within SwiftR, it is best that
+you have ssh configured for password-free login to the remote worker nodes
+you will be using.
-Ability to ssh to server machines (without password: agents, master
-control channel, etc) (FIXME: Are these limitations necessary?)
-Passwords or ssh key passphrases OK for some scenarios.
-ssh from Mac
-ssh -A when jumping to a new host (to forward the ssh agent)
+%Ability to ssh to server machines (without password: agents, master
+%control channel, etc) (FIXME: Are these limitations necessary?)
+%Passwords or ssh key passphrases OK for some scenarios.
-(or set up ssh agents manually)
+%ssh from Mac
-(document ssh tricks here for pw-less access)
+%ssh -A when jumping to a new host (to forward the ssh agent)
-}
+%(or set up ssh agents manually)
-\section{DIRECTORY STRUCTURE USED FOR SWIFT RUNTIME}{
-tbd
+%(document ssh tricks here for pw-less access)
+
}
-\section{PROCEESS STRUCTURE OF SWIFT RUNTIME}{
+\section{Swift Runtime Directory Structure}{
+SwiftR stores various files in the file system in the course of its operation.
+These are, by default, stored under the directory "$TMP/$USER/SwiftR".
-\preformatted{
+The Swift server services will store logs and other information under
+directories
+with names beginning with "swift.".
+One of these directories is created per \code{swiftInit} call.
-vanquish$ mp
-UID PID PPID PGID SID C STIME TTY TIME CMD
-wilde 3621 3553 3553 3553 0 19:17 ? 00:00:00 sshd: wilde at pts/1
-wilde 3622 3621 3622 3622 0 19:17 pts/1 00:00:00 -bash
-wilde 3726 3622 3726 3622 0 19:20 pts/1 00:00:00 /bin/bash ./start-swift local
-wilde 3775 3726 3726 3622 0 19:20 pts/1 00:00:00 /bin/sh /homes/wilde/RLibrary/Swift/exec/../swift/bin/swift -config
-wilde 3835 3775 3726 3622 0 19:20 pts/1 00:00:11 java -Xmx256M -Djava.endorsed.dirs=/homes/wilde/RLibrary/Swift/exe
-wilde 8664 3622 8664 3622 0 20:47 pts/1 00:00:00 ps -fjH -u wilde
-wilde 3441 3366 3366 3366 0 19:16 ? 00:00:00 sshd: wilde at pts/0
-wilde 3442 3441 3442 3442 0 19:16 pts/0 00:00:00 -bash
-wilde 4114 3442 4114 3442 0 19:35 pts/0 00:00:05 /usr/lib64/R/bin/exec/R
-wilde 4667 1 3726 3622 0 19:38 pts/1 00:00:00 /usr/lib64/R/bin/exec/R --slave --no-restore --file=./SwiftRServer.sh --ar
-wilde 4611 1 3726 3622 0 19:38 pts/1 00:00:00 /usr/lib64/R/bin/exec/R --slave --no-restore --file=./SwiftRServer.sh --ar
-wilde 4569 1 3726 3622 0 19:38 pts/1 00:00:00 /usr/lib64/R/bin/exec/R --slave --no-restore --file=./SwiftRServer.sh --ar
-wilde 4562 1 3726 3622 0 19:38 pts/1 00:00:00 /usr/lib64/R/bin/exec/R --slave --no-restore --file=./SwiftRServer.sh --ar
-wilde 4522 1 3726 3622 0 19:38 pts/1 00:00:00 /usr/lib64/R/bin/exec/R --slave --no-restore --file=./SwiftRServer.sh --ar
-wilde 4455 1 3726 3622 0 19:38 pts/1 00:00:00 /usr/lib64/R/bin/exec/R --slave --no-restore --file=./SwiftRServer.sh --ar
-wilde 4270 1 3726 3622 0 19:38 pts/1 00:00:00 /usr/lib64/R/bin/exec/R --slave --no-restore --file=./SwiftRServer.sh --ar
-wilde 4160 1 3726 3622 0 19:36 pts/1 00:00:00 /usr/lib64/R/bin/exec/R --slave --no-restore --file=./SwiftRServer.sh --ar
-vanquish$ }
+Swift workers store data files and logs under "$TMP/$USER/SwiftR" on
+whichever machine they are currently running on.
}
+
\section{DEBUGGING AND TROUBLESHOOTING}{
* manual mode
@@ -415,8 +375,10 @@
You should see periodic status update lines such as the following:
-\preformatted{
-tbd}
+\preformatted{Progress: Selecting site:6 Active:2 Finished successfully:84
+Progress: uninitialized:1 Finished successfully:92
+Progress: Stage in:1 Finished successfully:101
+}
* reporting bugs: what to send (FIXME: need swiftsnapshot script)
@@ -439,7 +401,8 @@
wget http://www.ci.uchicago.edu/~wilde/swift.rNNNN.cog.rNNNN.tar.gz
cd ~/SwiftR
-./install.sh # generates a .gz package in ~/public_html/*.gz}
+make install
+}
}
@@ -476,15 +439,9 @@
The following caveats are high priority on the FIXME list:
-You MUST start the Swift server before running a swiftapply() call
-from R. Otherwise R hangs and must be killed and restarted.
-
-When the FIFOs (named pipes) which are used to communicate from R to
-Swift get hung, you need to use kill or Quit to break out of R.
-
There is no automatic restart yet if swift dies in its server loop. In
particular, parsing errors, eg on the Swift initialexpr text, can
-cause the R and hence the Swift server to exit. The
+cause the R and hence the Swift server to exit
Only lapply is implemented (also SwiftApply) - need to see if we can
cut down arg passing overhead for many of the apply() cases.
Modified: SwiftApps/SwiftR/Swift/man/swiftInit.Rd
===================================================================
--- SwiftApps/SwiftR/Swift/man/swiftInit.Rd 2011-01-25 19:23:13 UTC (rev 4035)
+++ SwiftApps/SwiftR/Swift/man/swiftInit.Rd 2011-01-25 21:35:30 UTC (rev 4036)
@@ -11,6 +11,17 @@
}
%- maybe also 'usage' for other objects documented here.
\arguments{
+ All arguments to swiftInit() are optional.
+ If any of the arguments are not provided to swiftInit, this function
+ tries to obtain a value from Swift's options mechanism. The option
+ names correspond to the argument names prefixed with "swift.", for example
+ "swift.cores" or "swift.server".
+
+ This function does not necessarily require any arguments to be set directly
+ or indirectly through options, however depending on your server setting
+ and local configuration you may need to provide some argument.
+
+
\item{cores}{
The number of cores per host. The default values vary from 2 to 8 depending on the server type.
}
@@ -35,23 +46,29 @@
The project name passed to the PBS or SGE batch scheduler. Site-specific.
}
\item{parEnv}{
- SGE only.
+ SGE only. This is the parallel environment setting passed to the
+ Sun Grid Engine scheduler, and is required in order to run
+ multi-node jobs with SwiftR on Sun Grid Engine sites. The "mpi"
+ environment is often a suitable choice.
}
\item{workmode}{
Can be "node" or "slot".
- node: start one worker for all slots on a node. slot: start one worker
- per slot (multiple workers per node).
+ If "node", one worker is started for all slots on a node.
+ If "slot", one worker is started per slot (multiple workers per node).
}
\item{throttle}{
-%% ~~Describe \code{throttle} here~~
+ The throttle setting to be used by Swift: controls the rate of sending jobs
+ to workers.
}
\item{queue}{
The scheduler queue to put jobs in. This is only relevant for PBS
and SGE.
}
\item{rcmd}{
- Specific to SGE: the remote shell command. The default value is
- typically fine, but if you have issues it may need to be changed.
+ Specific to SGE clusters: this is the remote shell command that is
+ used, for example \code{ssh} or \code{qrsh}.
+ For most clusters, the default setting "ssh" works
+ , but if you have problems it may need to be changed.
}
\item{time}{
The duration to request nodes for from the PBS or SGE scheduler.
@@ -61,7 +78,7 @@
}
\item{workerLogging}{
- For testing purposes: the swift worker loggin level.
+ For testing purposes: the swift worker logging level.
}
}
Modified: SwiftApps/SwiftR/Swift/man/swiftapply.Rd
===================================================================
--- SwiftApps/SwiftR/Swift/man/swiftapply.Rd 2011-01-25 19:23:13 UTC (rev 4035)
+++ SwiftApps/SwiftR/Swift/man/swiftapply.Rd 2011-01-25 21:35:30 UTC (rev 4036)
@@ -3,18 +3,37 @@
\alias{swiftapply}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{
- swiftapply: Applying a Function to a List of Arguments
+ Applying a Function to a List of Arguments
}
\description{
- swiftapply takes a function, and list of argument lists,
+ Several variations of a parallel apply call are provided.
+ All variations take a function, and list of argument lists,
and applies the function to each of the argument lists.
This is done in parallel using the Swift engine.
+
+ swiftLapply behaves in the same way as the serial \code{lapply}
+ function.
+
+ swiftapply
}
\usage{
swiftapply(func, arglists, server = NULL, callsperbatch = NULL, runmode = NULL, initialexpr = NULL, workerhosts = NULL, keepwork = NULL, tmpdir = NULL, timeout = NULL, quiet = FALSE)
+swiftLapply(tlist, func, ...)
}
\arguments{
+ The first two arguments are required for all variations of the
+ apply.
+
+ All other arguments are optional.
+ swiftapply takes the full range of options specified below directly
+ as arguments. If they are not provided as arguments, settings specified
+ through R's options mechanism will be used, for example
+ \verb{options(swift.server="local"}.
+ swiftLapply and other variations also respect arguments
+ set through the R options mechanism.
+
+
\item{func}{
The function to apply.
}
@@ -24,6 +43,8 @@
\item{server}{
The swift server type to use to run. The possible values are
"local", "ssh", "pbs", "sge" and "pbsf", the same as swiftInit.
+ The most recently started server of the specified type will be used
+ to execute the apply call. The default value is "local".
}
\item{callsperbatch}{
The number of function calls to group together into a single batch.
More information about the Swift-commit
mailing list