[Swift-commit] r5181 - in SwiftApps/SwiftR: . Swift/exec
tga at ci.uchicago.edu
tga at ci.uchicago.edu
Tue Sep 27 14:50:12 CDT 2011
Author: tga
Date: 2011-09-27 14:50:12 -0500 (Tue, 27 Sep 2011)
New Revision: 5181
Modified:
SwiftApps/SwiftR/IMMEDIATE-TODO
SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh
SwiftApps/SwiftR/Swift/exec/configure-server-crayxt
SwiftApps/SwiftR/Swift/exec/start-swift
Log:
Cleanup temporary directory on lustre to avoid problems with stale temporary files
Modified: SwiftApps/SwiftR/IMMEDIATE-TODO
===================================================================
--- SwiftApps/SwiftR/IMMEDIATE-TODO 2011-09-27 19:09:41 UTC (rev 5180)
+++ SwiftApps/SwiftR/IMMEDIATE-TODO 2011-09-27 19:50:12 UTC (rev 5181)
@@ -1,7 +1,8 @@
+HIGH:
+-- Have run-specific temporary directories on lustre which can be cleaned up
-
HIGH:
-- OpenMx Benchmarking on Beagle
Modified: SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh
===================================================================
--- SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh 2011-09-27 19:09:41 UTC (rev 5180)
+++ SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh 2011-09-27 19:50:12 UTC (rev 5181)
@@ -17,7 +17,12 @@
# tmp=/tmp # FIXME: allow this to change eg for sites with main tmp dir elsewhere
# tmp=/scratch/local # FIXME: allow this to change eg for sites with main tmp dir elsewhere
-tmp=${SWIFTR_TMP:-${TMP:-/tmp}}
+if [ -z $SWIFTR_TMP ]; then
+ tmproot=${TMP:-/tmp}
+ tmp=$tmproot/$(id -nu)/SwiftR
+else
+ tmp=${SWIFTR_TMP}
+fi
RServerScript=$1
callFile=$2
@@ -114,7 +119,6 @@
# if mutex has been acquired, know another process active
if mkdir $SLOTDIR/mutex ; then
new_idletimer_id=$(cat $SLOTDIR/idletimer)
- echo new: "$new_idletimer_id" old "$idletimer_id" > $SLOTDIR/idletimer_ids
if [ "$new_idletimer_id" = "$idletimer_id" ]; then
echo killing idle R process $rpid
kill $rpid
@@ -128,10 +132,12 @@
# Ensure that the dir for this slot exists.
-BASEDIR=$tmp/$(id -nu)/SwiftR/Rworkers.$(hostname)
+
+BASEDIR=$tmp/Rworkers.$(hostname)
if mkdir -p $BASEDIR; then
- :
+ :
else
+ # Sometimes the old directory exists with wrong permissions
OLD_BASEDIR=$BASEDIR
BASEDIR=$tmp/SwiftR.$(hostname).${SWIFT_WORKER_PID}.Rworkers
mkdir -p $BASEDIR
Modified: SwiftApps/SwiftR/Swift/exec/configure-server-crayxt
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-crayxt 2011-09-27 19:09:41 UTC (rev 5180)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-crayxt 2011-09-27 19:50:12 UTC (rev 5181)
@@ -10,8 +10,6 @@
# FIXME: examine effect of 1-min default maxwalltime above
# FIXME: determine best value for throttle below
-LUSTRE_TMP=/lustre/beagle/$USER/swiftRtmp
-mkdir -p $LUSTRE_TMP
cat >sites.xml <<END
<config>
@@ -26,7 +24,9 @@
<profile namespace="karajan" key="initialScore">10000</profile>
<filesystem provider="local" url="none"/>
<profile namespace="env" key="SWIFTR_TMP">$LUSTRE_TMP</profile>
- <profile namespace="env" key="TMP">$LUSTRE_TMP</profile>
+ <!-- Add randomness to avoid R tmp directories filling up top directory:
+ R isn't always very good about cleaning up after itself -->
+ <profile namespace="env" key="TMPDIR">$LUSTRE_RTMP</profile>
<profile namespace="env" key="LD_LIBRARY_PATH">$LD_LIBRARY_PATH</profile>
<!-- Longer timeout often needed on Cray configuration-->
<profile namespace="env" key="SWIFTR_TIMEOUT">30</profile>
Modified: SwiftApps/SwiftR/Swift/exec/start-swift
===================================================================
--- SwiftApps/SwiftR/Swift/exec/start-swift 2011-09-27 19:09:41 UTC (rev 5180)
+++ SwiftApps/SwiftR/Swift/exec/start-swift 2011-09-27 19:50:12 UTC (rev 5181)
@@ -819,8 +819,34 @@
cores=$defaultClusterCores
fi
echo server=$server project=$project cores=$cores nodes=$nodes queue=$queue
- if [ $server = pbsf ]
- then
+
+ DIRS_TO_DELETE=
+ if [ $server = crayxt ]; then
+ #FIXME: beagle-specific code
+ LUSTRE_TMPROOT=/lustre/beagle/$USER/swiftRtmp
+ if mkdir -p $LUSTRE_TMPROOT; then
+ :
+ else
+ echo "Could not create temporary directory $LUSTRE_TMPROOT"
+ stdcleanup_start
+ stdcleanup_end
+ exit 1
+ fi
+
+
+ while true
+ do
+ LUSTRE_TMPSESSION=$LUSTRE_TMPROOT/$RANDOM
+ if mkdir $LUSTRE_TMPSESSION; then
+ break
+ fi
+ done
+ # Cray XT cluster nodes don't have local writable tmp storage
+ export LUSTRE_TMP=$LUSTRE_TMPSESSION
+ export LUSTRE_RTMP=$LUSTRE_TMPSESSION/Rtmp
+ mkdir -p $LUSTRE_RTMP
+ source $SWIFTRBIN/configure-server-crayxt
+ elif [ $server = pbsf ]; then
source $SWIFTRBIN/configure-server-pbs
else
source $SWIFTRBIN/configure-server-${server}
@@ -842,8 +868,12 @@
if [ "_$jobid" != _ ]; then
qdel "$jobid" &> /dev/null
fi
+ if [ $server = crayxt -a "$keepdir" = FALSE ]; then
+ # Clean up session working directory
+ rm -rf $LUSTRE_TMPSESSION
+ fi
+ stdcleanup_end
# eit cleanly
- stdcleanup_end
exit 0
}
More information about the Swift-commit
mailing list