[Swift-commit] r5052 - usertools/persistent-coasters
wozniak at ci.uchicago.edu
wozniak at ci.uchicago.edu
Thu Sep 1 15:07:04 CDT 2011
Author: wozniak
Date: 2011-09-01 15:07:04 -0500 (Thu, 01 Sep 2011)
New Revision: 5052
Added:
usertools/persistent-coasters/worker-cobalt.m4.zsh
usertools/persistent-coasters/worker-cobalt.zsh
Modified:
usertools/persistent-coasters/README.txt
usertools/persistent-coasters/settings.sh
usertools/persistent-coasters/start-service.zsh
usertools/persistent-coasters/workers-cobalt.zsh
usertools/persistent-coasters/workers-local.zsh
usertools/persistent-coasters/workers-ssh.zsh
Log:
Various improvements: now works with Cobalt on Eureka
Modified: usertools/persistent-coasters/README.txt
===================================================================
--- usertools/persistent-coasters/README.txt 2011-09-01 20:06:21 UTC (rev 5051)
+++ usertools/persistent-coasters/README.txt 2011-09-01 20:07:04 UTC (rev 5052)
@@ -1,28 +1,43 @@
-Overview of persistent CoasterService process
+// This is an asciidoc file but should also be human-readable
-0) Source settings.sh or a similar file
-1) Start coaster service
-2) Get URL to which Swift should connect from service output
-3) Run Swift once to send settings to CoasterService,
- putting CoasterService in passive mode
-4) Get URL to which workers should connect from Swift output
-5) Connect workers to CoasterService
-6) Run Swift for application
+= Usage
-Usage:
+. Edit +settings.sh+
+. Run +./start-service.zsh+
+.. +start-service.zsh+ does the steps outlined above
+. Start your application SwiftScripts with the generated sites.xml
-Start start-service.zsh
- - start-service.zsh does the above steps
-Start your application SwiftScripts with the generated sites.xml
+= Concepts
-Testing:
+* Worker mode: How to launch the worker.pl scripts
+= Overview of persistent CoasterService process
+
+. Source +settings.sh+
+. Start coaster service
+. Get URL to which Swift should connect from service output
+. Run Swift once to send settings to CoasterService,
+ putting CoasterService in passive mode
+. Get URL to which workers should connect from Swift output
+. Connect workers to CoasterService
+. Run Swift for application
+
+= Testing
+
After the service has started, you will have fresh sites.xml and tc.data
files. You can then run:
+-----------------
swift -sites.file sites.xml -tc.file tc.data system-info.swift
+-----------------
-which will run system-info.sh on the worker and create system-info.out .
+which will run +system-info.sh+ on the worker and create +system-info.out+
-Read system-info.out to see if it worked.
+Read +system-info.out+ to see if it worked.
+
+= Output files
+
+* Worker logs go into +WORKER_LOGDIR+ ($PWD/logs)
+** These will be named in accordance with the worker mode
+* The coaster service log: +$WORKER_LOGDIR/coaster-service.log+
Modified: usertools/persistent-coasters/settings.sh
===================================================================
--- usertools/persistent-coasters/settings.sh 2011-09-01 20:06:21 UTC (rev 5051)
+++ usertools/persistent-coasters/settings.sh 2011-09-01 20:07:04 UTC (rev 5052)
@@ -6,22 +6,30 @@
export SWIFT=
export COASTER_SERVICE=
+# Log level for the persistent-coasters scripts: DEBUG or NONE
+export PC_LOGGING=DEBUG
+
# Where to place/launch worker.pl on the remote machine for sites.xml
export WORKER_WORK=/home/${USER}/work
-# How to launch workers- local or ssh
-export WORKER_MODE=local
-# ssh
+# How to launch workers: local, ssh, or cobalt
+export WORKER_MODE=cobalt # local
# Worker logging setting passed to worker.pl for sites.xml
-export WORKER_LOGGING=INFO
+export WORKER_LOGGING_LEVEL=DEBUG
# Worker host names for ssh
# WORKER_HOSTS="login1 login2"
export WORKER_HOSTS="$( print login{1,2}.mcs.anl.gov )"
-# Some settings known to gensites
-NODES=64
-QUEUE=prod-devel
+# Allow the user to set the IP address of the service
+# Necessary on Eureka: use the 10.*** address
+export SERVICE_HOST=10.40.9.151
+
+# Some settings known to gensites, schedulers
+NODES=4
+QUEUE=default
+# minutes
MAXTIME=$(( 20 ))
WORK=${HOME}/work
+PROJECT=PTMAP
Modified: usertools/persistent-coasters/start-service.zsh
===================================================================
--- usertools/persistent-coasters/start-service.zsh 2011-09-01 20:06:21 UTC (rev 5051)
+++ usertools/persistent-coasters/start-service.zsh 2011-09-01 20:07:04 UTC (rev 5052)
@@ -9,25 +9,39 @@
# processes with ps
# Setup paths, get helper functions
+# PC -> "persistent-coasters"
PC=$( cd $( dirname $0 ) ; /bin/pwd )
TOOLS=${PC} # In the future this might be ${SWIFT_HOME}/tools
source ${TOOLS}/helpers.zsh
+source ${TOOLS}/settings.sh
[[ $? != 0 ]] && print "Could not load helpers.zsh!" && exit 1
[[ ${SWIFT} == "" ]] && SWIFT=$( which swift )
SWIFT_BIN=$( dirname ${SWIFT} )
-WORKER=${SWIFT_BIN}/worker.pl
+export WORKER=${SWIFT_BIN}/worker.pl
[[ ${COASTER_SERVICE} == "" ]] && COASTER_SERVICE=$( which coaster-service )
# This is an arbitrary port number
export SERVICE_PORT=10985
-LOGDIR=logs
-mkdir -p ${LOGDIR}
+export WORKER_LOGDIR=${PWD}/logs
+mkdir -p ${WORKER_LOGDIR}
exitcode
-SWIFT_OUT=${LOGDIR}/swift.out
+SWIFT_OUT=${PWD}/swift.out
-# Get the function start-workers() from somewhere
+# Timestamped log messages; may be disabled via PC_LOGGING
+log()
+{
+ MSG=${*}
+ local -R 4 T=${SECONDS}
+ if [[ ${PC_LOGGING} != "NONE" ]]
+ then
+ print "${T} ${MSG}"
+ fi
+}
+
+# Get the function start-workers() from one of these files:
+log $( declare WORKER_MODE )
if [[ ${WORKER_MODE} == "local" ]]
then
source workers-local.zsh
@@ -104,8 +118,9 @@
}
# eval trap cleanup_trap ${SIGNALS}
-SERVICE_LOG=${LOGDIR}/coaster-service.log
-$COASTER_SERVICE -nosec -p ${SERVICE_PORT} >& ${SERVICE_LOG} &
+log "Starting the coaster service..."
+SERVICE_LOG=${WORKER_LOGDIR}/coaster-service.log
+${COASTER_SERVICE} -nosec -p ${SERVICE_PORT} >& ${SERVICE_LOG} &
COASTER_SERVICE_PID=${!}
sleep 1
@@ -113,6 +128,7 @@
SERVICE_COASTERS=$( get_service_coasters ${SERVICE_LOG} )
exitcode "Could not get coasters service!"
export SERVICE_COASTERS
+log "Coaster service on: ${SERVICE_COASTERS}"
WORK=${WORKER_WORK}
source setup.sh
@@ -120,6 +136,7 @@
sleep 1
+log "Passivate..."
{ ${SWIFT} -config swift.properties \
-sites.file sites.passivate.xml \
-tc.file tc.passivate.data \
@@ -134,18 +151,19 @@
SERVICE_LOCAL=$( get_service_local ${SWIFT_OUT} )
exitcode "get_service_local failed!"
export SERVICE_LOCAL
+log "Local service on: ${SERVICE_LOCAL}"
sleep 1
-print "Starting workers..."
+log "Starting workers..."
start-workers ${SERVICE_LOCAL} &
START_WORKERS_PID=${!}
sleep 1
cp sites.passivate.xml sites.xml
-print "Created user sites file: sites.xml"
+log "Created user sites file: sites.xml"
cp tc.passivate.data tc.data
-print "Created user tc file: tc.data"
+log "Created user tc file: tc.data"
exit 0
Added: usertools/persistent-coasters/worker-cobalt.m4.zsh
===================================================================
--- usertools/persistent-coasters/worker-cobalt.m4.zsh (rev 0)
+++ usertools/persistent-coasters/worker-cobalt.m4.zsh 2011-09-01 20:07:04 UTC (rev 5052)
@@ -0,0 +1,23 @@
+#!/bin/zsh
+
+# This is a template processed by m4
+
+URI=esyscmd(printf $URI)
+ID=esyscmd(printf $ID)
+LOGDIR=esyscmd(printf $WORKER_LOGDIR)
+WORKER=esyscmd(printf $WORKER)
+
+export WORKER_LOGGING_LEVEL=esyscmd(printf $WORKER_LOGGING_LEVEL)
+
+# Launch the worker
+${WORKER} ${URI} ${ID} ${LOGDIR}
+WORKER_RC=${?}
+
+if [[ ${WORKER_RC} != 0 ]]
+ then
+ print "Worker failed: ${ID}"
+ date
+ exit 1
+fi
+
+exit 0
Added: usertools/persistent-coasters/worker-cobalt.zsh
===================================================================
--- usertools/persistent-coasters/worker-cobalt.zsh (rev 0)
+++ usertools/persistent-coasters/worker-cobalt.zsh 2011-09-01 20:07:04 UTC (rev 5052)
@@ -0,0 +1,19 @@
+#!/bin/zsh
+
+# This is a template processed by m4
+
+URI=http://140.221.82.124:38199
+ID=2011-09-01-18-29-53.21666
+LOGDIR=
+WORKER=/home/wozniak/import/cog/modules/swift/dist/swift-svn/bin/worker.pl
+
+${WORKER} ${URI} ${ID} ${LOGDIR}
+WORKER_RC=${?}
+
+if [[ ${WORKER_RC} != 0 ]]
+ then
+ print "Worker failed: ${ID}"
+ exit 1
+fi
+
+exit 0
Modified: usertools/persistent-coasters/workers-cobalt.zsh
===================================================================
--- usertools/persistent-coasters/workers-cobalt.zsh 2011-09-01 20:06:21 UTC (rev 5051)
+++ usertools/persistent-coasters/workers-cobalt.zsh 2011-09-01 20:07:04 UTC (rev 5052)
@@ -3,26 +3,39 @@
# This is not complete and will not work because you cannot
# pass argument on the cqsub command line
-start_workers()
+start-workers()
{
local URI=$1
- local TIMESTAMP=$(date "+%Y.%m%d.%H%M%S")
+ local TIMESTAMP=$(date "+%Y-%m-%d-%H-%M-%S")
local -Z 5 R=${RANDOM}
ID="${TIMESTAMP}.${R}"
- # TODO: 1) make worker.sh script that calls worker.pl with args
- # 2) cqsub that worker.sh
+ checkvars QUEUE PROJECT MAXTIME NODES WORKER_LOGDIR
- cqsub -q ${QUEUE} \
- -k zeptoos \
- -t ${MAXTIME} \ # minutes
- -n ${NODES} \
- --cwd ${LOGDIR} \
- -E ${LOGDIR}/cobalt.${$}.stderr \
- -o ${LOGDIR}/cobalt.${$}.stdout \
- -e "WORKER_LOGGING_LEVEL=DEBUG:ZOID_ENABLE_NAT=true" \
- ${WORKER} ${URI} ${ID} ${LOGDIR}
+ if [[ ${SERVICE_HOST} != "" ]]
+ then
+ # Override the local service IP address
+ # ZSH sed-like substitution expansion
+ URI=${URI/<->.<->.<->.<->/${SERVICE_HOST}}
+ fi
+ log "Workers connect to: ${URI}"
- START_WORKERS_PID=
+ # Make worker wrapper script that calls worker.pl with args
+ export URI ID LOGDIR
+ WORKER_WRAPPER=${WORKER_LOGDIR}/worker-cobalt.zsh
+ m4 < ${PC}/worker-cobalt.m4.zsh > ${WORKER_WRAPPER}
+ chmod u+x ${WORKER_WRAPPER}
+
+ # Launch it
+ cqsub -q ${QUEUE} \
+ -p ${PROJECT} \
+ -t ${MAXTIME} \
+ -n ${NODES} \
+ -C ${WORKER_LOGDIR} \
+ -E ${WORKER_LOGDIR}/worker.${ID}.err \
+ -o ${WORKER_LOGDIR}/worker.${ID}.out \
+ -e "WORKER_LOGGING_LEVEL=DEBUG" \
+ ${WORKER_WRAPPER}
+
return 0
}
Modified: usertools/persistent-coasters/workers-local.zsh
===================================================================
--- usertools/persistent-coasters/workers-local.zsh 2011-09-01 20:06:21 UTC (rev 5051)
+++ usertools/persistent-coasters/workers-local.zsh 2011-09-01 20:07:04 UTC (rev 5052)
@@ -6,7 +6,7 @@
{
local URI=$1
- ${WORKER} ${URI} LOCAL ${LOGDIR} &
+ ${WORKER} ${URI} LOCAL ${WORKER_LOGDIR} &
# TODO: manage these PIDs
# START_WORKERS_PID=
Modified: usertools/persistent-coasters/workers-ssh.zsh
===================================================================
--- usertools/persistent-coasters/workers-ssh.zsh 2011-09-01 20:06:21 UTC (rev 5051)
+++ usertools/persistent-coasters/workers-ssh.zsh 2011-09-01 20:07:04 UTC (rev 5052)
@@ -14,7 +14,8 @@
do
pwd
scp ${WORKER} ${MACHINE}:${WORKER_WORK}
- ssh ${MACHINE} ${WORKER_WORK}/worker.pl ${URI} ${MACHINE} ${LOGDIR} &
+ ssh ${MACHINE} \
+ ${WORKER_WORK}/worker.pl ${URI} ${MACHINE} ${WORKER_LOGDIR} &
done
# TODO: manage these PIDs
More information about the Swift-commit
mailing list