[Swift-commit] r3660 - in SwiftApps/SwiftR: . Swift/exec

noreply at svn.ci.uchicago.edu noreply at svn.ci.uchicago.edu
Wed Oct 6 15:19:47 CDT 2010


Author: wilde
Date: 2010-10-06 15:19:47 -0500 (Wed, 06 Oct 2010)
New Revision: 3660

Modified:
   SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh
   SwiftApps/SwiftR/Swift/exec/configure-site-local
   SwiftApps/SwiftR/Swift/exec/configure-site-pbs
   SwiftApps/SwiftR/TODO
   SwiftApps/SwiftR/UserGuide
Log:
Make local configution determine number of cores dynamically. Enable EvalRBatchPersistent.sh to log what signal terminated it, and add some temporary looking to debug a mysterious process exit incident. A few cosmetic comment changes.

Modified: SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh
===================================================================
--- SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh	2010-10-05 23:06:00 UTC (rev 3659)
+++ SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh	2010-10-06 20:19:47 UTC (rev 3660)
@@ -37,15 +37,19 @@
 
 TRAPS="EXIT 1 2 3 15"
 
-function onexit { # FIXME: move this logic into worker.pl, or try to use R timeout option if it works on fifos - need to test.
+function onexit() { # FIXME: move this logic into worker.pl, or try to use R timeout option if it works on fifos - need to test.
   trap - $TRAPS
-  echo "Terminating R processes $RPIDS"
+  echo "EvalRBatchPersistent.sh: onexit trap: SIGNAL=$1 RPIDS=$RPIDS"
   if [ "_$RPIDS" != _ ]; then
     kill $RPIDS >& /dev/null
   fi
 }
 
-trap onexit $TRAPS
+trap "onexit 1" 1
+trap "onexit 2" 2
+trap "onexit 3" 3
+trap "onexit 15" 15
+trap "onexit EXIT" EXIT
 
 function idletimer {
   cd $1
@@ -107,14 +111,21 @@
     break;
   fi
 done
+echo DB: Obtained $SLOTDIR/mutex
 
 echo run $(pwd)/$callFile $(pwd)/$resultFile > $SLOTDIR/toR.fifo
 touch $SLOTDIR/lastwrite
 
+echo DB: Sent request
+
 echo dummy stderr response 1>&2 # FIXME - testing if this is the provider staging problem (not xfering zero len stderr)
 
 head -3 < $SLOTDIR/fromR.fifo # FIXME: Trim this down to 1 line for each call (or same # lines for each, in particular, for "quit")
 
+echo DB: Got response
+
 rmdir $SLOTDIR/mutex
 
+echo DB: Freed $SLOTDIR/mutex
+
 # Fixme: how to get exceptions and stdout/stderr text from R server ???
\ No newline at end of file

Modified: SwiftApps/SwiftR/Swift/exec/configure-site-local
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-site-local	2010-10-05 23:06:00 UTC (rev 3659)
+++ SwiftApps/SwiftR/Swift/exec/configure-site-local	2010-10-06 20:19:47 UTC (rev 3660)
@@ -2,8 +2,13 @@
 
 throttleOneCore="-0.001"
 throttleOneCore="0.00"  # FIXME: test if new swft fix makes zero OK rather than -0.001
-localcores=5 # FIXME: parameterize: localthreads=N
 
+if [ -r /proc/cpuinfo ]; then
+  localcores=$(grep '^processor' /proc/cpuinfo | wc -l)
+else
+  localcores=4
+fi
+
 #### DBDBDBDB vvvvvv
 #localcores=1 
 
@@ -29,7 +34,7 @@
 # then add ENV::PATH as an option, e.g., from options(swift.remotepath and swift.remotepath.sitename)
 
 cat >>tc <<END
-local${i} bash /bin/bash null null ENV::PATH="$PATH";ENV::SWIFT_JOB_SLOT="${i}";ENV::SWIFT_WORKER_PID="$$"
+local${i} bash /bin/bash null null ENV::SWIFTR_TMP="$SWIFTR_TMP";ENV::PATH="$PATH";ENV::SWIFT_JOB_SLOT="${i}";ENV::SWIFT_WORKER_PID="$$"
 END
 
 cat >>sites.xml <<END

Modified: SwiftApps/SwiftR/Swift/exec/configure-site-pbs
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-site-pbs	2010-10-05 23:06:00 UTC (rev 3659)
+++ SwiftApps/SwiftR/Swift/exec/configure-site-pbs	2010-10-06 20:19:47 UTC (rev 3660)
@@ -1,6 +1,6 @@
 #! /bin/bash
 
-throttlePBS=.31
+throttlePBS=.31 # FIXME: parameterize thsi and several other variables, below.
 
 cat >tc <<END
 fork      bashlocal /bin/bash null null null

Modified: SwiftApps/SwiftR/TODO
===================================================================
--- SwiftApps/SwiftR/TODO	2010-10-05 23:06:00 UTC (rev 3659)
+++ SwiftApps/SwiftR/TODO	2010-10-06 20:19:47 UTC (rev 3660)
@@ -2,7 +2,22 @@
 
 MAIN
 
-- complete change for envvars like SWIFTR_TMP
+- still seeing sleep 1 mutex?
+
+- make it easy to adjust ncores (or base it on local host cores from CPUINFO)
+
+- minimal logging in cf
+
+- timing as an option (swift.tracktimes/showtimes); print or update a log array.
+
+- silent vs logging as an option
+
+- make order of R-vs-start-swift startup work either way
+
+- make start-swift restartable within a single R swiftapply()
+
+
+x complete change for envvars like SWIFTR_TMP
 - add sourcing of $HOME/.SwiftR.init: pick up variables for the configure_ scripts from here; maybe one file in this dir for each site supported? maybe configure scripts go here?
 
 - try coasters to communicado-bridled: use simple script for passive coasters: ala swift/lab/mcsswift.sh?
@@ -17,6 +32,9 @@
 - make tmpdir a param; for start-nnn scripts use SWIFTR_TMP, ~/.SwiftR
 - make ports flexible and non-conflicting
 
+- initVar only affects first calls on a server - if you change these you need to start a new server (FIXME)!
+- better handlig of initvars: detet if it changes; work corrcetly if its empty or missing.
+
 See if we can get Swift to just shut down the channel and start it again?
 - no extended idle timer
 - start new swift worker

Modified: SwiftApps/SwiftR/UserGuide
===================================================================
--- SwiftApps/SwiftR/UserGuide	2010-10-05 23:06:00 UTC (rev 3659)
+++ SwiftApps/SwiftR/UserGuide	2010-10-06 20:19:47 UTC (rev 3660)
@@ -17,20 +17,24 @@
 
 or set up ssh agents manually
 
+(document ssh tricks here for pw-less access)
 
+
 CAVEATS
 
 # fixed: Only one Swift server running per user; can only be used by one R client workspace at a time.
 
 when fifos get hung, need to use kill or Quit to break out of R; will fix.
 
-no retry if swift dies in server loop.
+no auto-restart yet if swift dies in server loop.
 
-Swift returns Error object when remote side fails.
 
 only lapply is implemented (also SwiftApply) - need to see if we can cut down arg passing overhead for many of the apply() cases
 
+log records build up fast; these will be reduced as we get more confidence withthe code and shake out bugs
 
+initVar only affects first calls on a server - if you change these you need to start a new server (FIXME)!
+
 INSTALL
 
 cd ???
@@ -41,14 +45,24 @@
 
 svn checkout https://svn.ci.uchicago.edu/svn/vdl2/SwiftApps/SwiftR
 
+CONFIGURE SERVERS
+
+edit configure-site-NAME in exec/
+
+can put local cores into an ssh pool
+
+
 START SERVERS
 
 # do this outside of R
 
-SWIFT=<your package install dir>/Swift
+SWIFT=<your package install dir>/Swift/
 $SWIFT/exec/start-swift-workers hostname
 $SWIFT/exec/start-swift-server 
 
+local and ssh servers can be started and left running, across R runs
+found via:
+
 HELLO WORLD TEST
 
 RUN FULL TEST
@@ -57,11 +71,24 @@
 
   or R CMD TEST etc?
 
+USAGE
+
+Swift returns Error object when remote side fails.
+
+options:
+  swift.server: matched server name on start-swift
+  swift.callsperbatch
+  initialize: 
+
+less likely to touch:
+  remove temp reqs (sp???) FIXME
+  mode (service, manual, ???)
+
 OPENMX EXAMPLES
 
 DEBUGGING
 
-8 manual mode
+* manual mode
 
 * logs to look at
 
@@ -69,3 +96,4 @@
   (FIXME: need swiftsnapshot script)
 
 
+




More information about the Swift-commit mailing list