[Swift-commit] r5181 - in SwiftApps/SwiftR: . Swift/exec

tga at ci.uchicago.edu tga at ci.uchicago.edu
Tue Sep 27 14:50:12 CDT 2011


Author: tga
Date: 2011-09-27 14:50:12 -0500 (Tue, 27 Sep 2011)
New Revision: 5181

Modified:
   SwiftApps/SwiftR/IMMEDIATE-TODO
   SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh
   SwiftApps/SwiftR/Swift/exec/configure-server-crayxt
   SwiftApps/SwiftR/Swift/exec/start-swift
Log:
Cleanup temporary directory on lustre to avoid problems with stale temporary files


Modified: SwiftApps/SwiftR/IMMEDIATE-TODO
===================================================================
--- SwiftApps/SwiftR/IMMEDIATE-TODO	2011-09-27 19:09:41 UTC (rev 5180)
+++ SwiftApps/SwiftR/IMMEDIATE-TODO	2011-09-27 19:50:12 UTC (rev 5181)
@@ -1,7 +1,8 @@
 
+HIGH:
+-- Have run-specific temporary directories on lustre which can be cleaned up
 
 
-
 HIGH:
 -- OpenMx Benchmarking on Beagle
 

Modified: SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh
===================================================================
--- SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh	2011-09-27 19:09:41 UTC (rev 5180)
+++ SwiftApps/SwiftR/Swift/exec/EvalRBatchPersistent.sh	2011-09-27 19:50:12 UTC (rev 5181)
@@ -17,7 +17,12 @@
 
 # tmp=/tmp # FIXME: allow this to change eg for sites with main tmp dir elsewhere
 # tmp=/scratch/local # FIXME: allow this to change eg for sites with main tmp dir elsewhere
-tmp=${SWIFTR_TMP:-${TMP:-/tmp}}
+if [ -z $SWIFTR_TMP ]; then
+    tmproot=${TMP:-/tmp}
+    tmp=$tmproot/$(id -nu)/SwiftR
+else
+    tmp=${SWIFTR_TMP}
+fi
 
 RServerScript=$1
 callFile=$2
@@ -114,7 +119,6 @@
     # if mutex has been acquired, know another process active
     if mkdir $SLOTDIR/mutex ; then
         new_idletimer_id=$(cat $SLOTDIR/idletimer)
-        echo new: "$new_idletimer_id" old "$idletimer_id" > $SLOTDIR/idletimer_ids
         if [ "$new_idletimer_id" = "$idletimer_id" ]; then
           echo killing idle R process $rpid
           kill $rpid
@@ -128,10 +132,12 @@
 
 # Ensure that the dir for this slot exists. 
 
-BASEDIR=$tmp/$(id -nu)/SwiftR/Rworkers.$(hostname)
+
+BASEDIR=$tmp/Rworkers.$(hostname)
 if mkdir -p $BASEDIR; then
-    :
+  :
 else
+    # Sometimes the old directory exists with wrong permissions
     OLD_BASEDIR=$BASEDIR
     BASEDIR=$tmp/SwiftR.$(hostname).${SWIFT_WORKER_PID}.Rworkers
     mkdir -p $BASEDIR

Modified: SwiftApps/SwiftR/Swift/exec/configure-server-crayxt
===================================================================
--- SwiftApps/SwiftR/Swift/exec/configure-server-crayxt	2011-09-27 19:09:41 UTC (rev 5180)
+++ SwiftApps/SwiftR/Swift/exec/configure-server-crayxt	2011-09-27 19:50:12 UTC (rev 5181)
@@ -10,8 +10,6 @@
 # FIXME: examine effect of 1-min default maxwalltime above
 # FIXME: determine best value for throttle below
 
-LUSTRE_TMP=/lustre/beagle/$USER/swiftRtmp
-mkdir -p $LUSTRE_TMP
 
 cat >sites.xml <<END
 <config>
@@ -26,7 +24,9 @@
     <profile namespace="karajan" key="initialScore">10000</profile>
     <filesystem provider="local" url="none"/>
     <profile namespace="env" key="SWIFTR_TMP">$LUSTRE_TMP</profile>
-    <profile namespace="env" key="TMP">$LUSTRE_TMP</profile>
+    <!-- Add randomness to avoid R tmp directories filling up top directory:
+      R isn't always very good about cleaning up after itself -->
+    <profile namespace="env" key="TMPDIR">$LUSTRE_RTMP</profile>
     <profile namespace="env" key="LD_LIBRARY_PATH">$LD_LIBRARY_PATH</profile>
     <!-- Longer timeout often needed on Cray configuration-->
     <profile namespace="env" key="SWIFTR_TIMEOUT">30</profile>

Modified: SwiftApps/SwiftR/Swift/exec/start-swift
===================================================================
--- SwiftApps/SwiftR/Swift/exec/start-swift	2011-09-27 19:09:41 UTC (rev 5180)
+++ SwiftApps/SwiftR/Swift/exec/start-swift	2011-09-27 19:50:12 UTC (rev 5181)
@@ -819,8 +819,34 @@
     cores=$defaultClusterCores
   fi
   echo server=$server project=$project cores=$cores nodes=$nodes queue=$queue 
-  if [ $server = pbsf ]
-  then
+
+  DIRS_TO_DELETE=
+  if [ $server = crayxt ]; then
+    #FIXME: beagle-specific code
+    LUSTRE_TMPROOT=/lustre/beagle/$USER/swiftRtmp
+    if mkdir -p $LUSTRE_TMPROOT; then
+        :
+    else 
+        echo "Could not create temporary directory $LUSTRE_TMPROOT"
+        stdcleanup_start
+        stdcleanup_end
+        exit 1
+    fi
+
+
+    while true
+    do
+        LUSTRE_TMPSESSION=$LUSTRE_TMPROOT/$RANDOM
+        if mkdir $LUSTRE_TMPSESSION; then
+            break
+        fi
+    done
+    # Cray XT cluster nodes don't have local writable tmp storage
+    export LUSTRE_TMP=$LUSTRE_TMPSESSION
+    export LUSTRE_RTMP=$LUSTRE_TMPSESSION/Rtmp
+    mkdir -p $LUSTRE_RTMP
+    source $SWIFTRBIN/configure-server-crayxt 
+  elif [ $server = pbsf ]; then
     source $SWIFTRBIN/configure-server-pbs
   else
     source $SWIFTRBIN/configure-server-${server}
@@ -842,8 +868,12 @@
     if [ "_$jobid" != _ ]; then
       qdel "$jobid" &> /dev/null
     fi
+    if [ $server = crayxt -a "$keepdir" = FALSE ]; then 
+        # Clean up session working directory
+        rm -rf $LUSTRE_TMPSESSION
+    fi
+    stdcleanup_end
     # eit cleanly
-    stdcleanup_end
     exit 0   
 }
 




More information about the Swift-commit mailing list