[Swift-commit] r7342 - in trunk: bin etc/sites
davidk at ci.uchicago.edu
davidk at ci.uchicago.edu
Wed Nov 27 17:16:18 CST 2013
Author: davidk
Date: 2013-11-27 17:16:18 -0600 (Wed, 27 Nov 2013)
New Revision: 7342
Modified:
trunk/bin/swift-service
trunk/bin/swiftrun
trunk/etc/sites/persistent-coasters
Log:
Use coaster-client to display information about the number of nodes and cores being used
Make sure jobspernode gets set correctly in the coaster service (workaround for bug #467)
Fix to kill local workers by pid
Various clean-up
Modified: trunk/bin/swift-service
===================================================================
--- trunk/bin/swift-service 2013-11-27 16:15:10 UTC (rev 7341)
+++ trunk/bin/swift-service 2013-11-27 23:16:18 UTC (rev 7342)
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/bash
#
# Allows a user to start, stop, name, and view the status of coaster services
#
@@ -7,9 +7,12 @@
export SERVICE_DIR="$HOME/.swift/service"
export COASTER_SERVICE="$SWIFT_BIN/coaster-service"
export LOG="swift-service.log"
+export SPID=""
+
export WORKER="$SWIFT_BIN/worker.pl"
export WORKER_LOG="worker"
-export WORKER_LOG_DIR="."
+export WORKER_LOG_DIR="NOLOGGING"
+export WORKER_LOGGING_LEVEL="NONE"
export IPADDR="127.0.0.1"
mkdir -p "$SERVICE_DIR" || crash "Unable to create $SERVICE_DIR"
@@ -65,6 +68,23 @@
done
}
+# Wait for a PID to stop running, up to a given amount of time ($2)
+wait_for_pid()
+{
+ PID=$1
+ TIME=$2
+ count=0
+ while ps -p $PID &>/dev/null
+ do
+ sleep 1
+ (( count++ ))
+ if [ "$count" -ge "$TIME" ]; then
+ nicely_kill_all_children $PID
+ fi
+ done
+}
+
+
# Return current timestamp
get_timestamp()
{
@@ -117,6 +137,7 @@
(( count++ ))
if [ "$count" -ge "$grace" ]; then
run_command kill -9 $pid
+ break
fi
done
}
@@ -136,8 +157,8 @@
nicely_kill_all_children $PID
fi
- if [ -f "$SERVICE_DIR/$SERVICENAME/workers.pid" ]; then
- PID=$( $SERVICE_DIR/$SERVICENAME/workers.pid )
+ if [ -f "$SERVICE_DIR/$SERVICENAME/worker.pid" ]; then
+ PID=$( cat $SERVICE_DIR/$SERVICENAME/worker.pid )
nicely_kill_all_children $PID
fi
@@ -255,20 +276,15 @@
rm $LOCAL_PORT_FILE
fi
- echo Service name: $SERVICE_NAME
- echo Worker script: $WORKER
-
# Generate sites.xml
- export EXECUTIONURL="http://$IPADDR:$SERVICE_PORT"
+ export SERVICEURL="http://$IPADDR:$SERVICE_PORT"
export WORKERURL=$( java -jar $SWIFT_BIN/listcoasterurls.jar $LOCAL_PORT )
- echo Execution URL: $EXECUTIONURL
- echo Worker URL: $WORKERURL
- echo $EXECUTIONURL >> $COASTER_DIR/execution.url
+ echo $SERVICEURL >> $COASTER_DIR/service.url
echo $WORKERURL >> $COASTER_DIR/worker.url
echo $WORKER >> $COASTER_DIR/worker.path
echo $LOCAL_PORT >> $COASTER_DIR/worker.port
echo $SERVICE_PORT >> $COASTER_DIR/service.port
-
+
GENSITES_ARGS=""
if [ -f "$CONFIG_FILE" ]; then
GENSITES_ARGS=" -p $CONFIG_FILE "
@@ -281,17 +297,14 @@
fi
# Give defaults to gensites variables if needed
- if [ -z "$JOBSPERNODE" ]; then
- export JOBSPERNODE=1
- fi
- if [ -z "$JOBTHROTTLE" ]; then
- export JOBTHROTTLE=0
- fi
- if [ -z "$WORK" ]; then
- export WORK=/tmp
- fi
+ export JOBSPERNODE=${JOBSPERNODE:-7}
+ export JOBTHROTTLE=${JOBTHROTTLE:-1}
+ export WORK=${WORK:-/tmp}
+
gensites -n $SERVICE_NAME $GENSITES_ARGS > $COASTER_DIR/sites.xml
- echo
+ # Workaround for bug #467
+ SPID=$( run_command_bg swift -site $SERVICE_NAME $SWIFT_BIN/../examples/swift/misc/hello.swift )
+ sleep 3
}
# Display status of services
@@ -311,7 +324,11 @@
for service in $services
do
- verify_files_exist $service/pid $service/service.port $service/execution.url $service/worker.port $service/worker.url $service/worker.path
+ verify_files_exist $service/pid $service/service.port $service/service.url $service/worker.port $service/worker.url $service/worker.path
+
+ ccoutput=$( mktemp $TMPDIR/XXXXXX )
+ $SWIFT_BIN/coaster-client $( cat $service/service.url ) list workers > $ccoutput 2>/dev/null
+
echo -e "\nName:\t\t$(basename $service )"
PID=$( cat $service/pid )
echo -e "PID:\t\t$PID"
@@ -322,12 +339,23 @@
echo -e "Status:\t\tFailed"
fi
- echo -e "Service URL:\t$( cat $service/execution.url )"
+ echo -e "Service URL:\t$( cat $service/service.url )"
echo -e "Local URL:\t$( cat $service/worker.url )"
-
echo -e "Worker script:\t$( cat $service/worker.path )"
+
+ nodes=0
+ cores=0
+ nodes=$( cat $ccoutput | sed '1d'|wc -l)
+ cores=$( cat $ccoutput | sed 1d | awk '{print $2}'|paste -sd+ | bc )
+ # active=$( cat $ccoutput | sed 1d | awk '{print $3}'|paste -sd+ | bc )
+ rm $ccoutput
+
+ echo -e "Nodes:\t\t$nodes"
+ echo -e "Cores:\t\t$cores"
+ # echo -e "Active jobs:\t$active"
count=$((count+1))
done
+
if [ "$count" -gt 0 ]; then
echo
else
@@ -369,7 +397,6 @@
# Start local workers
start_workers_local()
{
- echo Connecting local workers to service $SERVICE_NAME
service="$SERVICE_DIR/$SERVICE_NAME"
verify_files_exist $service/worker.path $service/worker.url
LOG=$service/workers.log
@@ -410,7 +437,13 @@
scheduler) start_workers_scheduler;;
*) crash "Unknown WORKER_MODE";;
esac
-
+ wait_for_pid $SPID 5
+ latest=$( ls -l1rtd run* | tail -1 )
+ if [ -d "$latest" ]; then
+ rm -rf $latest
+ fi
+ display_status
+
elif [ "$STOP" == 1 ]; then
stop_service
elif [ "$STATUS" == 1 ]; then
Modified: trunk/bin/swiftrun
===================================================================
--- trunk/bin/swiftrun 2013-11-27 16:15:10 UTC (rev 7341)
+++ trunk/bin/swiftrun 2013-11-27 23:16:18 UTC (rev 7342)
@@ -307,6 +307,7 @@
foreach my $key (sort keys %properties) {
print "\t$key=$properties{$key}\n";
}
+ print "\n";
exit;
}
Modified: trunk/etc/sites/persistent-coasters
===================================================================
--- trunk/etc/sites/persistent-coasters 2013-11-27 16:15:10 UTC (rev 7341)
+++ trunk/etc/sites/persistent-coasters 2013-11-27 23:16:18 UTC (rev 7342)
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://www.ci.uchicago.edu/swift/SwiftSites">
<pool handle="persistent-coasters">
- <execution provider="coaster-persistent" url="_EXECUTIONURL_" jobmanager="local:local"/>
+ <execution provider="coaster-persistent" url="_SERVICEURL_" jobmanager="local:local"/>
<profile namespace="globus" key="workerManager">passive</profile>
<profile namespace="globus" key="jobsPerNode">_JOBSPERNODE_</profile>
<profile key="jobThrottle" namespace="karajan">_JOBTHROTTLE_</profile>
More information about the Swift-commit
mailing list