[Swift-commit] r3768 - in SwiftApps/SwiftR: . Swift/exec
noreply at svn.ci.uchicago.edu
noreply at svn.ci.uchicago.edu
Sun Dec 12 22:45:17 CST 2010
Author: wilde
Date: 2010-12-12 22:45:17 -0600 (Sun, 12 Dec 2010)
New Revision: 3768
Modified:
SwiftApps/SwiftR/Swift/exec/configure-server-pbs
SwiftApps/SwiftR/Swift/exec/start-swift
SwiftApps/SwiftR/install.sh
Log:
initial changes to handle SGE. May need to re-adjust PBS changes.
Modified: SwiftApps/SwiftR/Swift/exec/configure-server-pbs
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-pbs 2010-12-12 23:47:03 UTC (rev 3767)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-pbs 2010-12-13 04:45:17 UTC (rev 3768)
@@ -50,7 +50,7 @@
END
-true to save <<END
+: SAVE FOR REFERENCE <<END
# <profile namespace="globus" key="maxWallTime">00:00:01</profile>
# <profile namespace="globus" key="queue">fast</profile>
Modified: SwiftApps/SwiftR/Swift/exec/start-swift
===================================================================
--- SwiftApps/SwiftR/Swift/exec/start-swift 2010-12-12 23:47:03 UTC (rev 3767)
+++ SwiftApps/SwiftR/Swift/exec/start-swift 2010-12-13 04:45:17 UTC (rev 3768)
@@ -1,5 +1,7 @@
#! /bin/bash
+set -x
+
export TRAPEVENTS="EXIT 1 2 3 15" # Signals and conditions to trap
# Define internal functions
@@ -54,6 +56,8 @@
echo $sshpids > $sshpidfile
}
+# FIXME: does PBS need same workers-per-node logic as SGE?
+
make-pbs-submit-file()
{
if [ $queue != default ]; then
@@ -61,7 +65,7 @@
else
queueDirective=""
fi
-cat >pbs.sub <<END
+cat >batch.sub <<END
#PBS -S /bin/sh
#PBS -N SwiftR-workers
#PBS -m n
@@ -85,6 +89,8 @@
END
}
+# Submit file for PBS systems with firewall restrictions: specifically, Merlot at VCU
+
make-pbsf-submit-file()
{
if [ _$GLOBUS_HOSTNAME = _ ]; then
@@ -97,7 +103,7 @@
else
queueDirective=""
fi
-cat >pbs.sub <<END
+cat >batch.sub <<END
#PBS -S /bin/sh
#PBS -N SwiftR-workers
#PBS -m n
@@ -105,37 +111,99 @@
#PBS -l walltime=$time
#PBS -o $HOME
#PBS -e $HOME
-$queueDirective
-WORKER_LOGGING_ENABLED=true # FIXME: parameterize; fix w PBS -v
-HOST=\$(echo $CONTACT | sed -e 's,^http://,,' -e 's/:.*//')
-PORT=\$(echo $CONTACT | sed -e 's,^.*:,,')
-CONTACT=http://localhost:\$PORT
-echo '***' PBS_NODEFILE file: \$PBS_NODEFILE CONTACT:$CONTACT
-cat \$PBS_NODEFILE
-echo '***' unique nodes are:
-sort < \$PBS_NODEFILE|uniq
-for h in \$(sort < \$PBS_NODEFILE|uniq); do
- ssh \$h "echo Swift R startup running on host; hostname; echo HOST=\$HOST PORT=\$PORT CONTACT=\$CONTACT; cd /; ( ssh -N -L \$PORT:\$HOST:\$PORT \$HOST & sleep 3; /usr/bin/perl $SWIFTBIN/worker.pl \$CONTACT SwiftR-\$h $HOME/.globus/coasters $IDLETIMEOUT ; wait)" &
-done
+ $queueDirective
+ WORKER_LOGGING_ENABLED=true # FIXME: parameterize; fix w PBS -v
+ HOST=\$(echo $CONTACT | sed -e 's,^http://,,' -e 's/:.*//')
+ PORT=\$(echo $CONTACT | sed -e 's,^.*:,,')
+ CONTACT=http://localhost:\$PORT
+ echo '***' PBS_NODEFILE file: \$PBS_NODEFILE CONTACT:$CONTACT
+ cat \$PBS_NODEFILE
+ echo '***' unique nodes are:
+ sort < \$PBS_NODEFILE|uniq
+ for h in \$(sort < \$PBS_NODEFILE|uniq); do
+ ssh \$h "echo Swift R startup running on host; hostname; echo HOST=\$HOST PORT=\$PORT CONTACT=\$CONTACT; cd /; ( ssh -N -L \$PORT:\$HOST:\$PORT \$HOST & sleep 3; /usr/bin/perl $SWIFTBIN/worker.pl \$CONTACT SwiftR-\$h $HOME/.globus/coasters $IDLETIMEOUT ; wait)" &
+ done
+#
+ ontrap()
+ {
+ echo in ontrap
+ # Kill our processes on each node; do first node (on which this is running) last
+ for h in \$(sort < \$PBS_NODEFILE|uniq | sort -r); do
+ echo killing processes on host \$h
+ ssh \$h killall -u \$USER
+ done;
+ killall -u \$USER
+ }
+#
+ trap ontrap $TRAPEVENTS
+ wait
+#
+END
+}
-ontrap()
+# FIXME: for big systems like Ranger, need to use ssh_tree to avoid socket FD exhastion?
+
+echo about to define make-sge
+
+make-sge-submit-file()
{
- echo in ontrap
- # Kill our processes on each node; do first node (on which this is running) last
- for h in \$(sort < \$PBS_NODEFILE|uniq | sort -r); do
- echo killing processes on host \$h
- ssh \$h killall -u \$USER
- done;
- killall -u \$USER
-}
+echo in $0
+ if [ $queue != default ]; then
+ queueDirective="#$ -q $queue"
+ else
+ queueDirective=""
+ fi
+ if [ $project != default ]; then
+ projectDirective="#$ -A $project"
+ else
+ projectDirective=""
+ fi
+ rcmd="qrsh" # FIXME - need to set on system basis; qrsh works for siraf
+
+cat >batch.sub <<END
+#!/bin/bash
+#$ -S /bin/bash
+#$ -o $HOME
+#$ -e $HOME
+#$ -N SwiftR
+#$ -l h_rt=$time
+# #$ -v WORKER_LOGGING_LEVEL=NONE
+#$ -V
-trap ontrap $TRAPEVENTS
-wait
+# Siraf Site-specific:
+#$ -pe openmpi $(($nodes*$cores))
+$queueDirective
+# -A ???
+#$projectDirective
+# Ranger Site-specific:
+# $ -pe 16way 256
+# $ -q development
+# $ -A TG-DBS080004N
+
+ cd / && NODES=\`cat \$PE_HOSTFILE | awk '{ for(i=0;i<\$2;i++){print \$1} }'\`
+
+ # -or- cd / && NODES=`cat $PE_HOSTFILE | awk '{print $1}'` # Better for Ranger
+
+ # WORKER_LOGGING_ENABLED=true # FIXME: parameterize; fix w PBS -v
+ #cd / && /usr/bin/perl $SWIFTBIN/worker.pl $CONTACT SwiftR-workers $HOME/.globus/coasters $IDLETIMEOUT
+ HOST=\$(echo $CONTACT | sed -e 's,^http://,,' -e 's/:.*//')
+ PORT=\$(echo $CONTACT | sed -e 's,^.*:,,')
+ echo '***' PE_HOSTFILE file: \$PE_HOSTFILE CONTACT:$CONTACT
+
+ for h in \$NODES; do
+ workerCmd="echo Swift R startup running on host; hostname; cd /; WORKER_LOGGING_LEVEL=NONE /usr/bin/perl $SWIFTBIN/worker.pl $CONTACT SwiftR-\$h $HOME/.globus/coasters $IDLETIMEOUT"
+ if [ $rcmd = ssh ]; then
+ ssh \$h "\$workerCmd" &
+ else
+ qrsh -nostdin -l hostname=\$h "\$workerCmd" &
+ fi
+ done
+ wait
END
}
-function wait-and-start-pbs-workers
+function wait-and-start-batch-workers
{
get-contact
LOGDIR=$(pwd)/swiftworkerlogs # full path. FIXME: Generate this with remote-side paths if not shared dir env?
@@ -145,11 +213,11 @@
IDLETIMEOUT=$((60*60*240)) # 10 days: FIXME: make this a command line arg
- # FIXME: set up for capturing pbs job id: rm -rf remotepid.* # FIXME: should not be needed if we start in a new dir each time
+ # FIXME: set up for capturing batch job id: rm -rf remotepid.* # FIXME: should not be needed if we start in a new dir each time
make-${server}-submit-file
- qsub pbs.sub>$pbsjobidfile
+ qsub batch.sub >$jobidfile
- echo Started workers from PBS job $(cat $pbsjobidfile)
+ echo Started workers from batch job $(cat $jobidfile)
}
usage()
@@ -216,6 +284,7 @@
throttle=10
hosts=no-hosts-specified
queue=default
+project=default
while [ $# -gt 0 ]
do
@@ -225,7 +294,8 @@
-n) nodes=$2; verify-is-numeric nodes $nodes; shift ;;
-p) throttle=$2; verify-is-numeric throttle $throttle; shift ;;
-q) queue=$2; verify-not-null queue $queue; shift ;;
- -s) server=$2; verify-is-one-of server $server local ssh pbs pbsf; shift ;;
+ -A) project=$2; verify-not-null project $project; shift ;;
+ -s) server=$2; verify-is-one-of server $server local ssh pbs pbsf sge; shift ;;
-t) time=$2; verify-not-null time $time; shift ;;
*) usage; exit 1 ;;
esac
@@ -300,31 +370,31 @@
wait-and-start-ssh-workers &
starterpid=$!
-elif [ \( $server = pbs \) -o \( $server = pbsf \) ]; then
+elif [ \( $server = pbs \) -o \( $server = pbsf \) -o \( $server = sge \) ]; then
- source $SWIFTRBIN/configure-server-pbs $cores
+ source $SWIFTRBIN/configure-server-${server} $cores
- pbsjobidfile=${out/stdouterr/pbsjobid}
+ jobidfile=${out/stdouterr/jobid}
TRAPEVENTS="EXIT 1 2 3 15" # Signals and conditions to trap
function onexit {
coasterservicepid="" # null: saved in case we go back to using coaster servers
trap - $TRAPEVENTS
- pbsjobid=$(cat $pbsjobidfile)
- echo Terminating worker processes starter $starterpid and PBS job $pbsjobid
+ jobid=$(cat $jobidfile)
+ echo Terminating worker processes starter $starterpid and batch job $jobid
if [ "_$starterpid != _ ]; then
kill $starterpid
fi
- if [ "_$pbsjobid != _ ]; then
- qdel $pbsjobid
+ if [ "_$jobid != _ ]; then
+ qdel $jobid
fi
kill 0 # Kill all procs in current process group # FIXME: what was this for????
}
trap onexit $TRAPEVENTS
- wait-and-start-pbs-workers &
+ wait-and-start-batch-workers &
starterpid=$!
fi
Modified: SwiftApps/SwiftR/install.sh
===================================================================
--- SwiftApps/SwiftR/install.sh 2010-12-12 23:47:03 UTC (rev 3767)
+++ SwiftApps/SwiftR/install.sh 2010-12-13 04:45:17 UTC (rev 3768)
@@ -1,4 +1,8 @@
ver=0.1
+rm -rf Swift/inst/swift/*
+mkdir -p Swift/inst/swift
+SWIFTREL=$(cd $(dirname $(which swift))/..; pwd)
+cp -pr $SWIFTREL/* Swift/inst/swift
R CMD build Swift
R CMD INSTALL Swift_${ver}.tar.gz
cp Swift_${ver}.tar.gz ~/public_html
More information about the Swift-commit
mailing list