[Swift-commit] r5052 - usertools/persistent-coasters

wozniak at ci.uchicago.edu wozniak at ci.uchicago.edu
Thu Sep 1 15:07:04 CDT 2011


Author: wozniak
Date: 2011-09-01 15:07:04 -0500 (Thu, 01 Sep 2011)
New Revision: 5052

Added:
   usertools/persistent-coasters/worker-cobalt.m4.zsh
   usertools/persistent-coasters/worker-cobalt.zsh
Modified:
   usertools/persistent-coasters/README.txt
   usertools/persistent-coasters/settings.sh
   usertools/persistent-coasters/start-service.zsh
   usertools/persistent-coasters/workers-cobalt.zsh
   usertools/persistent-coasters/workers-local.zsh
   usertools/persistent-coasters/workers-ssh.zsh
Log:
Various improvements: now works with Cobalt on Eureka


Modified: usertools/persistent-coasters/README.txt
===================================================================
--- usertools/persistent-coasters/README.txt	2011-09-01 20:06:21 UTC (rev 5051)
+++ usertools/persistent-coasters/README.txt	2011-09-01 20:07:04 UTC (rev 5052)
@@ -1,28 +1,43 @@
 
-Overview of persistent CoasterService process
+// This is an asciidoc file but should also be human-readable
 
-0) Source settings.sh or a similar file
-1) Start coaster service
-2) Get URL to which Swift should connect from service output
-3) Run Swift once to send settings to CoasterService,
-   putting CoasterService in passive mode
-4) Get URL to which workers should connect from Swift output
-5) Connect workers to CoasterService
-6) Run Swift for application
+= Usage
 
-Usage:
+. Edit +settings.sh+
+. Run +./start-service.zsh+
+.. +start-service.zsh+ does the steps outlined above
+. Start your application SwiftScripts with the generated sites.xml
 
-Start start-service.zsh
- - start-service.zsh does the above steps
-Start your application SwiftScripts with the generated sites.xml
+= Concepts
 
-Testing:
+* Worker mode: How to launch the worker.pl scripts
 
+= Overview of persistent CoasterService process
+
+. Source +settings.sh+
+. Start coaster service 
+. Get URL to which Swift should connect from service output
+. Run Swift once to send settings to CoasterService,
+  putting CoasterService in passive mode
+. Get URL to which workers should connect from Swift output
+. Connect workers to CoasterService
+. Run Swift for application
+
+= Testing
+
 After the service has started, you will have fresh sites.xml and tc.data
 files.  You can then run:
 
+-----------------
 swift -sites.file sites.xml -tc.file tc.data system-info.swift
+-----------------
 
-which will run system-info.sh on the worker and create system-info.out .
+which will run +system-info.sh+ on the worker and create +system-info.out+
 
-Read system-info.out to see if it worked.
+Read +system-info.out+ to see if it worked.
+
+= Output files
+
+* Worker logs go into +WORKER_LOGDIR+ ($PWD/logs)
+** These will be named in accordance with the worker mode
+* The coaster service log: +$WORKER_LOGDIR/coaster-service.log+

Modified: usertools/persistent-coasters/settings.sh
===================================================================
--- usertools/persistent-coasters/settings.sh	2011-09-01 20:06:21 UTC (rev 5051)
+++ usertools/persistent-coasters/settings.sh	2011-09-01 20:07:04 UTC (rev 5052)
@@ -6,22 +6,30 @@
 export SWIFT=
 export COASTER_SERVICE=
 
+# Log level for the persistent-coasters scripts: DEBUG or NONE
+export PC_LOGGING=DEBUG
+
 # Where to place/launch worker.pl on the remote machine for sites.xml
 export WORKER_WORK=/home/${USER}/work
 
-# How to launch workers- local or ssh
-export WORKER_MODE=local
-# ssh
+# How to launch workers: local, ssh, or cobalt
+export WORKER_MODE=cobalt # local
 
 # Worker logging setting passed to worker.pl for sites.xml
-export WORKER_LOGGING=INFO
+export WORKER_LOGGING_LEVEL=DEBUG
 
 # Worker host names for ssh
 # WORKER_HOSTS="login1 login2"
 export WORKER_HOSTS="$( print login{1,2}.mcs.anl.gov )"
 
-# Some settings known to gensites
-NODES=64
-QUEUE=prod-devel
+# Allow the user to set the IP address of the service
+#  Necessary on Eureka: use the 10.*** address
+export SERVICE_HOST=10.40.9.151
+
+# Some settings known to gensites, schedulers
+NODES=4
+QUEUE=default
+# minutes
 MAXTIME=$(( 20 ))
 WORK=${HOME}/work
+PROJECT=PTMAP

Modified: usertools/persistent-coasters/start-service.zsh
===================================================================
--- usertools/persistent-coasters/start-service.zsh	2011-09-01 20:06:21 UTC (rev 5051)
+++ usertools/persistent-coasters/start-service.zsh	2011-09-01 20:07:04 UTC (rev 5052)
@@ -9,25 +9,39 @@
 # processes with ps
 
 # Setup paths, get helper functions
+# PC -> "persistent-coasters"
 PC=$( cd $( dirname $0 ) ; /bin/pwd )
 TOOLS=${PC} # In the future this might be ${SWIFT_HOME}/tools
 source ${TOOLS}/helpers.zsh
+source ${TOOLS}/settings.sh
 [[ $? != 0 ]] && print "Could not load helpers.zsh!" && exit 1
 [[ ${SWIFT} == "" ]] && SWIFT=$( which swift )
 SWIFT_BIN=$( dirname ${SWIFT} )
-WORKER=${SWIFT_BIN}/worker.pl
+export WORKER=${SWIFT_BIN}/worker.pl
 [[ ${COASTER_SERVICE} == "" ]] && COASTER_SERVICE=$( which coaster-service )
 
 # This is an arbitrary port number
 export SERVICE_PORT=10985
 
-LOGDIR=logs
-mkdir -p ${LOGDIR}
+export WORKER_LOGDIR=${PWD}/logs
+mkdir -p ${WORKER_LOGDIR}
 exitcode
 
-SWIFT_OUT=${LOGDIR}/swift.out
+SWIFT_OUT=${PWD}/swift.out
 
-# Get the function start-workers() from somewhere
+# Timestamped log messages; may be disabled via PC_LOGGING
+log()
+{
+  MSG=${*}
+  local -R 4 T=${SECONDS}
+  if [[ ${PC_LOGGING} != "NONE" ]] 
+    then
+    print "${T} ${MSG}"
+  fi
+}
+
+# Get the function start-workers() from one of these files:
+log $( declare WORKER_MODE )
 if [[ ${WORKER_MODE} == "local" ]]
 then
   source workers-local.zsh
@@ -104,8 +118,9 @@
 }
 # eval trap cleanup_trap ${SIGNALS}
 
-SERVICE_LOG=${LOGDIR}/coaster-service.log
-$COASTER_SERVICE -nosec -p ${SERVICE_PORT} >& ${SERVICE_LOG} &
+log "Starting the coaster service..."
+SERVICE_LOG=${WORKER_LOGDIR}/coaster-service.log
+${COASTER_SERVICE} -nosec -p ${SERVICE_PORT} >& ${SERVICE_LOG} &
 COASTER_SERVICE_PID=${!}
 
 sleep 1
@@ -113,6 +128,7 @@
 SERVICE_COASTERS=$( get_service_coasters ${SERVICE_LOG} )
 exitcode "Could not get coasters service!"
 export SERVICE_COASTERS
+log "Coaster service on: ${SERVICE_COASTERS}"
 
 WORK=${WORKER_WORK}
 source setup.sh
@@ -120,6 +136,7 @@
 
 sleep 1
 
+log "Passivate..."
 { ${SWIFT} -config swift.properties \
   -sites.file sites.passivate.xml \
   -tc.file tc.passivate.data \
@@ -134,18 +151,19 @@
 SERVICE_LOCAL=$( get_service_local ${SWIFT_OUT} )
 exitcode "get_service_local failed!"
 export SERVICE_LOCAL
+log "Local service on: ${SERVICE_LOCAL}"
 
 sleep 1
 
-print "Starting workers..."
+log "Starting workers..."
 start-workers ${SERVICE_LOCAL} &
 START_WORKERS_PID=${!}
 
 sleep 1
 
 cp sites.passivate.xml sites.xml
-print "Created user sites file: sites.xml"
+log "Created user sites file: sites.xml"
 cp tc.passivate.data tc.data
-print "Created user tc file: tc.data"
+log "Created user tc file: tc.data"
 
 exit 0

Added: usertools/persistent-coasters/worker-cobalt.m4.zsh
===================================================================
--- usertools/persistent-coasters/worker-cobalt.m4.zsh	                        (rev 0)
+++ usertools/persistent-coasters/worker-cobalt.m4.zsh	2011-09-01 20:07:04 UTC (rev 5052)
@@ -0,0 +1,23 @@
+#!/bin/zsh
+
+# This is a template processed by m4
+
+URI=esyscmd(printf $URI)
+ID=esyscmd(printf $ID)
+LOGDIR=esyscmd(printf $WORKER_LOGDIR)
+WORKER=esyscmd(printf $WORKER)
+
+export WORKER_LOGGING_LEVEL=esyscmd(printf $WORKER_LOGGING_LEVEL)
+
+# Launch the worker
+${WORKER} ${URI} ${ID} ${LOGDIR}
+WORKER_RC=${?}
+
+if [[ ${WORKER_RC} != 0 ]] 
+  then 
+  print "Worker failed: ${ID}"
+  date
+  exit 1
+fi
+
+exit 0

Added: usertools/persistent-coasters/worker-cobalt.zsh
===================================================================
--- usertools/persistent-coasters/worker-cobalt.zsh	                        (rev 0)
+++ usertools/persistent-coasters/worker-cobalt.zsh	2011-09-01 20:07:04 UTC (rev 5052)
@@ -0,0 +1,19 @@
+#!/bin/zsh
+
+# This is a template processed by m4
+
+URI=http://140.221.82.124:38199
+ID=2011-09-01-18-29-53.21666
+LOGDIR=
+WORKER=/home/wozniak/import/cog/modules/swift/dist/swift-svn/bin/worker.pl
+
+${WORKER} ${URI} ${ID} ${LOGDIR}
+WORKER_RC=${?}
+
+if [[ ${WORKER_RC} != 0 ]] 
+  then 
+  print "Worker failed: ${ID}"
+  exit 1
+fi
+
+exit 0

Modified: usertools/persistent-coasters/workers-cobalt.zsh
===================================================================
--- usertools/persistent-coasters/workers-cobalt.zsh	2011-09-01 20:06:21 UTC (rev 5051)
+++ usertools/persistent-coasters/workers-cobalt.zsh	2011-09-01 20:07:04 UTC (rev 5052)
@@ -3,26 +3,39 @@
 # This is not complete and will not work because you cannot
 #  pass argument on the cqsub command line
 
-start_workers()
+start-workers()
 {
   local URI=$1
-  local TIMESTAMP=$(date "+%Y.%m%d.%H%M%S")
+  local TIMESTAMP=$(date "+%Y-%m-%d-%H-%M-%S")
   local -Z 5 R=${RANDOM}
   ID="${TIMESTAMP}.${R}"
 
-  # TODO: 1) make worker.sh script that calls worker.pl with args
-  #       2) cqsub that worker.sh
+  checkvars QUEUE PROJECT MAXTIME NODES WORKER_LOGDIR 
 
-  cqsub -q ${QUEUE}   \
-        -k zeptoos    \
-        -t ${MAXTIME} \ # minutes
-        -n ${NODES}   \
-        --cwd ${LOGDIR} \
-        -E ${LOGDIR}/cobalt.${$}.stderr \
-        -o ${LOGDIR}/cobalt.${$}.stdout \
-        -e "WORKER_LOGGING_LEVEL=DEBUG:ZOID_ENABLE_NAT=true" \
-        ${WORKER} ${URI} ${ID} ${LOGDIR}
+  if [[ ${SERVICE_HOST} != "" ]] 
+    then
+    # Override the local service IP address
+    # ZSH sed-like substitution expansion
+    URI=${URI/<->.<->.<->.<->/${SERVICE_HOST}}
+  fi
+  log "Workers connect to: ${URI}"
 
-  START_WORKERS_PID=
+  # Make worker wrapper script that calls worker.pl with args
+  export URI ID LOGDIR  
+  WORKER_WRAPPER=${WORKER_LOGDIR}/worker-cobalt.zsh
+  m4 < ${PC}/worker-cobalt.m4.zsh > ${WORKER_WRAPPER}
+  chmod u+x ${WORKER_WRAPPER}
+
+  # Launch it
+  cqsub -q ${QUEUE}                          \
+        -p ${PROJECT}                        \
+        -t ${MAXTIME}                        \
+        -n ${NODES}                          \
+        -C ${WORKER_LOGDIR}                  \
+        -E ${WORKER_LOGDIR}/worker.${ID}.err \
+        -o ${WORKER_LOGDIR}/worker.${ID}.out \
+        -e "WORKER_LOGGING_LEVEL=DEBUG"      \
+        ${WORKER_WRAPPER} 
+
   return 0
 }

Modified: usertools/persistent-coasters/workers-local.zsh
===================================================================
--- usertools/persistent-coasters/workers-local.zsh	2011-09-01 20:06:21 UTC (rev 5051)
+++ usertools/persistent-coasters/workers-local.zsh	2011-09-01 20:07:04 UTC (rev 5052)
@@ -6,7 +6,7 @@
 {
   local URI=$1
 
-  ${WORKER} ${URI} LOCAL ${LOGDIR} &
+  ${WORKER} ${URI} LOCAL ${WORKER_LOGDIR} &
 
   # TODO: manage these PIDs
   # START_WORKERS_PID=

Modified: usertools/persistent-coasters/workers-ssh.zsh
===================================================================
--- usertools/persistent-coasters/workers-ssh.zsh	2011-09-01 20:06:21 UTC (rev 5051)
+++ usertools/persistent-coasters/workers-ssh.zsh	2011-09-01 20:07:04 UTC (rev 5052)
@@ -14,7 +14,8 @@
   do
     pwd
     scp ${WORKER} ${MACHINE}:${WORKER_WORK}
-    ssh ${MACHINE} ${WORKER_WORK}/worker.pl ${URI} ${MACHINE} ${LOGDIR} &
+    ssh ${MACHINE} \
+      ${WORKER_WORK}/worker.pl ${URI} ${MACHINE} ${WORKER_LOGDIR} &
   done
 
   # TODO: manage these PIDs




More information about the Swift-commit mailing list