[Swift-commit] r7155 - in trunk: bin etc/sites

davidk at ci.uchicago.edu davidk at ci.uchicago.edu
Mon Oct 14 00:37:45 CDT 2013


Author: davidk
Date: 2013-10-14 00:37:26 -0500 (Mon, 14 Oct 2013)
New Revision: 7155

Modified:
   trunk/bin/start-coaster-service
   trunk/etc/sites/persistent-coasters
Log:
Updates to start-coaster-service to make more flexible and easier to integrate with swiftrun


Modified: trunk/bin/start-coaster-service
===================================================================
--- trunk/bin/start-coaster-service	2013-10-13 05:01:30 UTC (rev 7154)
+++ trunk/bin/start-coaster-service	2013-10-14 05:37:26 UTC (rev 7155)
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# crash: Report a problem and exit
+# Report a problem and exit
 crash()
 {
    MSG=$1
@@ -8,237 +8,51 @@
    exit 1
 }
 
-# Start futuregrid workers
-start-workers-futuregrid()
+# Wait for a file to be created, up to given amount of time
+wait_for_file()
 {
-   # Setup environment
-   PORT=$1
-   EXECUTIONURL=http://localhost:$PORT
-   export EC2_ACCESS_KEY=$FUTUREGRID_IAAS_ACCESS_KEY
-   export EC2_SECRET_KEY=$FUTUREGRID_IAAS_SECRET_KEY
-
-   # Check that SWIFTVMBOOT_DIR looks ok
-   if [ ! -d "$SWIFTVMBOOT_DIR" ] || [ ! -x "$SWIFTVMBOOT_DIR/futuregrid/bin/bootit.sh" ]; then
-      crash "SWIFTVMBOOT_DIR incorrectly defined in coaster-service.conf"
-   fi
-
-   # Install ve
-   if [ ! -d "$SWIFTVMBOOT_DIR/futuregrid/ve" ]; then
-      echo Setting up environment
-      python $SWIFTVMBOOT_DIR/futuregrid/bin/virtualenv.py $SWIFTVMBOOT_DIR/futuregrid/ve
-      if [ $? -ne 0 ]; then
-         echo "Failed to created the needed python virtual environment"
-         exit 1
+   FILE=$1
+   TIME=$2
+   count=0
+   while [ ! -s "$FILE" ]; do
+      sleep 1
+      (( count += 1 ))
+      if [ $count -ge $TIME ]; then
+         crash "Timed out waiting for coaster port file $FILE"
       fi
-   fi
-
-   # Install cloudinitd
-   source $SWIFTVMBOOT_DIR/futuregrid/ve/bin/activate
-   easy_install cloudinitd
-   if [ $? -ne 0 ]; then
-      echo "Failed to install cloudinitd"
-      exit 1
-   fi
-
-   # Register key
-   echo "Registering the key names in all the clouds"
-   python $SWIFTVMBOOT_DIR/futuregrid/bin/register_key.py $SWIFTVMBOOT_DIR/futuregrid/hosts.txt
-   if [ $? -ne 0 ]; then
-      echo "Failed to register the key names"
-      exit 1
-   fi
-
-   # Start virtual machines
-   echo Starting virtual machines.. please wait
-   $SWIFTVMBOOT_DIR/futuregrid/bin/bootit.sh | tee -a bootit.log
-   SWIFTVMBOOT_OUTPUT=$SWIFTVMBOOT_DIR/futuregrid/output.json
-   if [ ! -f "$SWIFTVMBOOT_OUTPUT" ]; then
-      crash "Error: Swift VM output file $SWIFTVMBOOT_OUTPUT does not exist!"
-   fi
-
-   SWIFTVM_INSTANCE=`grep "Starting up run" bootit.log |awk '{print $4}'`
-   echo $SWIFTVM_INSTANCE >> $HOME/.swift/.swiftvm_instances
-   WORKER_HOSTS=`grep hostname $SWIFTVMBOOT_OUTPUT |awk '{print $2}'|sed 's/\"//g;s/,//g;s/null//g'`
-
-   # Start worker script
-   for MACHINE in $WORKER_HOSTS
-   do
-      echo $MACHINE >> $HOME/.swift/machines
-      scp $SWIFT_BIN/$WORKER $WORKER_USERNAME@$MACHINE:$WORKER_LOCATION > /dev/null 2>&1
-      if [ "$SSH_TUNNELING" == "yes" ]; then
-         ssh -R *:$PORT:localhost:$PORT $WORKER_USERNAME@$MACHINE sleep 999 &
-         echo $! >> $PID_FILE
-         echo "Starting worker on $MACHINE"
-         ssh $WORKER_USERNAME@$MACHINE "$WORKER_LOCATION/$WORKER http://localhost:$PORT $MACHINE $LOG_DIR" &
-         echo $! >> $PID_FILE
-      else
-         echo "Starting worker on $MACHINE"
-         ssh $WORKER_USERNAME@$MACHINE "$WORKER_LOCATION/$WORKER $EXECUTIONURL $MACHINE $LOG_DIR" &
-         echo $! >> $PID_FILE
-      fi
-
    done
 }
 
-# Globus Provision workers
-start-workers-gp()
+run_command()
 {
-   PORT=$1
-   EXECUTIONURL=http://localhost:$PORT
-
-   if [ ! -d "$SWIFTVMBOOT_DIR" ] || [ ! -x "$SWIFTVMBOOT_DIR/bin/gp-instance-create" ]; then
-      crash "SWIFTVMBOOT_DIR incorrectly defined in coaster-service.conf"
-   fi
-
-   SEDFILE=`mktemp`
-   {
-      echo "s at _CLUSTER-NODES_@$EC2_NODES@"
-      echo "s at _INSTANCE-TYPE_@$EC2_INSTANCE_TYPE@"
-      echo "s at _KEYPAIR_@$EC2_KEYPAIR@"
-      echo "s at _KEYFILE_@$EC2_KEYFILE@"
-      echo "s at _AMI_@$EC2_AMI@"
-   } > $SEDFILE
-   sed -f $SEDFILE < "$SWIFTVMBOOT_DIR/ec2.template.conf" > "$SWIFTVMBOOT_DIR/ec2.conf"
-   rm $SEDFILE
-
-   echo Creating instance..
-   "$SWIFTVMBOOT_DIR/bin/gp-instance-create" -c "$SWIFTVMBOOT_DIR/ec2.conf" | sed -r "s/\x1B\[([0-9]{1,2}(;[0-9]{1,2})?)?[m|K]//g" | tee -a gpic.out
-   SWIFTVM_INSTANCE=`awk '{print $4}' gpic.out`
-   rm gpic.out
-   echo $SWIFTVM_INSTANCE >> $HOME/.swift/.swiftvm_instance
-
-   echo Starting instance..
-   "$SWIFTVMBOOT_DIR/bin/gp-instance-start" "$SWIFTVM_INSTANCE" | sed -r "s/\x1B\[([0-9]{1,2}(;[0-9]{1,2})?)?[m|K]//g"
-   WORKER_HOSTS=`$SWIFTVMBOOT_DIR/bin/gp-instance-describe $SWIFTVM_INSTANCE|sed -r "s/\x1B\[([0-9]{1,2}(;[0-9]{1,2})?)?[m|K]//g"|awk '{print $3}'`
-
-   # Start worker script
-   if [ -f "$HOME/.swift/machines" ]; then
-      rm $HOME/.swift/machines
-   fi
-   for MACHINE in $WORKER_HOSTS
-   do
-      # Create a list of machines for other applications, if needed
-      echo $MACHINE >> $HOME/.swift/machines
-
-      # Enable ssh tunneling if needed
-      if [ "$SSH_TUNNELING" == "yes" ]; then
-         ssh -q -o StrictHostKeyChecking=no -R *:$PORT:localhost:$PORT $WORKER_USERNAME@$MACHINE sleep 999 &
-         echo $! >> $PID_FILE
-      fi
-
-      # Copy and start worker script
-      scp -q -o StrictHostKeyChecking=no $SWIFT_BIN/$WORKER $WORKER_USERNAME@$MACHINE:$WORKER_LOCATION > /dev/null 2>&1
-      echo "Starting worker on $MACHINE"
-      ssh -q -o StrictHostKeyChecking=no $WORKER_USERNAME@$MACHINE "$WORKER_LOCATION/$WORKER $EXECUTIONURL $MACHINE $LOG_DIR" &
-      echo $! >> $PID_FILE
-   done
+   command="$@"
+   echo "Running $command" >> $LOG
+   $command >> $LOG 2>&1
 }
 
-# EC2 workers
-start-workers-ec2()
+run_command_bg()
 {
-   PORT=$1
-   EXECUTIONURL=http://localhost:$PORT
-   if [ ! -d "$SWIFTVMBOOT_DIR" ] || [ ! -x "$SWIFTVMBOOT_DIR/ec2/bin/ec2-run-instances" ]; then
-      crash "SWIFTVMBOOT_DIR incorrectly defined in coaster-service.conf"
-   fi
-
-   export EC2_HOME="$SWIFTVMBOOT_DIR/ec2"
-   export EC2_PRIVATE_KEY="$EC2_KEYFILE"
-   export EC2_CERT="$EC2_CERTFILE"
-
-   echo Creating instance..
-   $SWIFTVMBOOT_DIR/ec2/bin/ec2-run-instances "$EC2_AMI" -t "$EC2_INSTANCE_TYPE" -n "$EC2_NODES" -K "$EC2_KEYFILE" -C "$EC2_CERT"
-   SWIFTVM_INSTANCES=$( $SWIFTVMBOOT_DIR/ec2/bin/ec2-describe-instances | grep INSTANCE | grep -v terminated |awk '{print $2}' )
-   echo $SWIFTVM_INSTANCES >> $HOME/.swift/.swiftvm_instances
-   echo Waiting for nodes to boot..
-
-   # Wait until all instances are listed as running
-   while /bin/true
-   do
-      SWIFTVM_INSTANCES_AS_STRING=$( echo $SWIFTVM_INSTANCES | tr "\\n" " ")
-      STATUS_LIST=$( $SWIFTVMBOOT_DIR/ec2/bin/ec2-describe-instances $SWIFTVM_INSTANCES_AS_STRING | grep INSTANCE | grep -v terminated | awk '{print $6}' |sort -u )
-      if [ "$STATUS_LIST" == "running" ]; then
-         break
-      fi
-      sleep 5
-   done
-
-   # There is some delay between when the machines are 'running', and when system utilities like sshd are started
-   sleep 30
-
-   WORKER_HOSTS=$( $SWIFTVMBOOT_DIR/ec2/bin/ec2-describe-instances $SWIFTVM_INSTANCES_AS_STRING | grep INSTANCE | grep -v terminated | awk '{print $4}' )
-
-   if [ -f "$HOME/.swift/machines" ]; then
-      rm $HOME/.swift/machines
-   fi
-
-   # Start worker script
-   SSH_OPTS="-i $EC2_KEYFILE -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
-   for MACHINE in $WORKER_HOSTS
-   do
-
-      # Create a list of machines for other applications, if needed
-      echo $MACHINE >> $HOME/.swift/machines
-
-      # Copy and start worker script
-      scp $SSH_OPTS $SWIFT_BIN/$WORKER $WORKER_USERNAME@$MACHINE:$WORKER_LOCATION > /dev/null 2>&1
-      echo $! >> $PID_FILE
-
-      # Enable ssh tunneling if needed
-      if [ "$SSH_TUNNELING" == "yes" ]; then
-         ssh $SSH_OPTS -R *:$PORT:localhost:$PORT $WORKER_USERNAME@$MACHINE sleep 999 > /dev/null 2>&1 &
-         sleep 10
-         ssh $SSH_OPTS $WORKER_USERNAME@$MACHINE "$WORKER_LOCATION/$WORKER http://localhost:$PORT $MACHINE $LOG_DIR" 2>&1 &
-         echo $! >> $PID_FILE
-      else
-         echo "Starting worker on $MACHINE"
-         ssh $SSH_OPTS $WORKER_USERNAME@$MACHINE "$WORKER_LOCATION/$WORKER $EXECUTIONURL $MACHINE $LOG_DIR" > /dev/null 2>&1 &
-         echo $! >> $PID_FILE
-      fi
-
-      # Copy SSH key for easier access
-      cat $HOME/.ssh/*.pub | ssh $SSH_OPTS $WORKER_USERNAME@$MACHINE 'umask 077; cat >> $HOME/.ssh/authorized_keys' > /dev/null 2>&1
-   done
+   command="$@"
+   echo "Running $command" >> $LOG
+   $command >> $LOG 2>&1 &
+   echo $! >> $PID_FILE
 }
 
-
 # Start SSH workers
 start-workers-ssh()
 {
-   PORT=$1
-   EXECUTIONURL=http://$IPADDR:$PORT
-   if [ -z "$PORT" ]; then
-      crash "start-workers-ssh: Port number not specified, giving up"
-   fi
-
-   if [ -n "$WORKER_RELAY_HOST" ]; then
-      scp -A $SWIFT_BIN/$WORKER $WORKER_USERNAME@$WORKER_RELAY_HOST:/tmp > /dev/null 2>&1
-   fi
-
    for MACHINE in $WORKER_HOSTS
    do
       # Enable ssh tunneling if needed
       if [ "$SSH_TUNNELING" == "yes" ]; then
-         ssh -R *:$PORT:localhost:$PORT $WORKER_USERNAME@$MACHINE sleep 999 &
-         echo $! >> $PID_FILE
+         run_command_bg ssh -N -T -R *:$LOCAL_PORT:localhost:$LOCAL_PORT "$WORKER_USERNAME@$MACHINE"
       fi
 
-      # Use a relay host
-      if [ -n "$WORKER_RELAY_HOST" ]; then
-         ssh -A $WORKER_USERNAME@$WORKER_RELAY_HOST ssh $MACHINE mkdir -p $WORKER_LOCATION > /dev/null 2>&1
-         ssh -A $WORKER_USERNAME@$WORKER_RELAY_HOST "scp /tmp/$WORKER $WORKER_USERNAME@$MACHINE:$WORKER_LOCATION" > /dev/null 2>&1
-         echo Starting worker on $MACHINE
-         ssh -A $WORKER_USERNAME@$WORKER_RELAY_HOST ssh $WORKER_USERNAME@$MACHINE "WORKER_LOGGING_LEVEL=$WORKER_LOGGING_LEVEL $WORKER_LOCATION/$WORKER $EXECUTIONURL $MACHINE $WORKER_LOG_DIR" &
-         echo $! >> $PID_FILE
       # Connect directly
-      else
-         ssh $WORKER_USERNAME@$MACHINE mkdir -p $WORKER_LOCATION > /dev/null 2>&1
-         scp $SWIFT_BIN/$WORKER $WORKER_USERNAME@$MACHINE:$WORKER_LOCATION > /dev/null 2>&1
-         echo Starting worker on $MACHINE
-         ssh $WORKER_USERNAME@$MACHINE "WORKER_LOGGING_LEVEL=$WORKER_LOGGING_LEVEL $WORKER_LOCATION/$WORKER $EXECUTIONURL $MACHINE $WORKER_LOG_DIR" &
-         echo $! >> $PID_FILE
-      fi
+      run_command ssh $WORKER_USERNAME@$MACHINE mkdir -p $WORKER_LOCATION
+      run_command scp $WORKER $WORKER_USERNAME@$MACHINE:$WORKER_LOCATION
+      echo Starting worker on $MACHINE
+      run_command_bg ssh $WORKER_USERNAME@$MACHINE "WORKER_LOGGING_LEVEL=$WORKER_LOGGING_LEVEL $WORKER_LOCATION/worker.pl $WORKERURL $MACHINE $WORKER_LOG_DIR"
    done
    return 0
 }
@@ -246,152 +60,19 @@
 # Start local workers
 start-workers-local()
 {
-   PORT=$1
-   EXECUTIONURL=http://$IPADDR:$PORT
-   if [ -z "$PORT" ]; then
-      crash "start-workers-local: Port number not specified, giving up"
-   fi
    echo Starting worker on local machine
-   $WORKER $EXECUTIONURL LOCAL $LOG_DIR &
-   echo $! >> $PID_FILE
+   run_command_bg $WORKER $WORKERURL LOCAL $WORKER_LOG_DIR
    return 0
 }
 
 # Start condor workers
-start-workers-condor()
+start-workers-scheduler()
 {
-   PORT=$1
-   EXECUTIONURL=http://$IPADDR:$PORT
-   if [ -z "$PORT" ]; then
-      crash "start-workers-local: Port number not specified, giving up"
-   fi
    echo Starting workers
-
-   if [ -z "$CONDOR_WORKERS" ]; then
-      crash "CONDOR_WORKERS undefined - please check coaster-service.conf"
-   fi
-
-   if [ -z "$CONDOR_MACHINE_COUNT" ]; then
-      export CONDOR_MACHINE_COUNT=1
-   fi
-
-cat <<EOF > start_workers.submit
-output = workers.stdout
-error = workers.stderr
-executable = $WORKER_LOCATION/$WORKER
-arguments = $EXECUTIONURL node .
-notification = Never
-leave_in_queue = FALSE
-machine_count = $CONDOR_MACHINE_COUNT
-+AccountingGroup="group_friends.$USER"
-queue $CONDOR_WORKERS
-EOF
-
-   if [ -n "$WORKER_RELAY_HOST" ]; then
-      scp start_workers.submit $WORKER_USERNAME@$WORKER_RELAY_HOST:
-      scp $SWIFT_BIN/$WORKER $WORKER_USERNAME@$WORKER_RELAY_HOST:$WORKER_LOCATION > /dev/null 2>&1
-      ssh $WORKER_USERNAME@$WORKER_RELAY_HOST "condor_submit start_workers.submit"
-   else
-      condor_submit start_workers.submit
-   fi
+   run_command $SCHEDULER_COMMAND
 }
 
-# Start condor workers
-start-workers-slurm()
-{
-   PORT=$1
-   EXECUTIONURL=http://$IPADDR:$PORT
-   if [ -z "$PORT" ]; then
-      crash "start-workers-local: Port number not specified, giving up"
-   fi
-   echo Starting workers
-
-   if [ -z "$MACHINE_COUNT" ]; then
-      export MACHINE_COUNT=1
-   fi
-
-cat <<EOF > start_workers.submit
-#!/bin/bash
-
-#SBATCH --output=start_workers.stdout
-#SBATCH --error=start_workers.stderr
-#SBATCH --nodes=$MACHINE_COUNT
-#SBATCH --exclusive
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=12
-#SBATCH --partition=normal
-#SBATCH --time=11:59:00
-#SBATCH -A TG-ASC090068
-export WORKER_LOGGING_LEVEL=NONE
-ibrun $WORKER_LOCATION/$WORKER $EXECUTIONURL node .
-EOF
-
-   if [ -n "$WORKER_RELAY_HOST" ]; then
-      scp start_workers.submit $WORKER_USERNAME@$WORKER_RELAY_HOST:
-      scp $SWIFT_BIN/$WORKER $WORKER_USERNAME@$WORKER_RELAY_HOST:$WORKER_LOCATION > /dev/null 2>&1
-      ssh $WORKER_USERNAME@$WORKER_RELAY_HOST "sbatch start_workers.submit"
-   else
-      sbatch start_workers.submit
-   fi
-}
-
-
-# Start GWMS workers
-start-workers-gwms()
-{
-   PORT=$1
-   EXECUTIONURL=http://$IPADDR:$PORT
-
-   if [ -z "$PORT" ]; then
-      crash "start-workers-local: Port number not specified, giving up"
-   fi
-
-   if [ -z "$CONDOR_WORKERS" ]; then
-      crash "CONDOR_WORKERS undefined - please check coaster-service.conf"
-   fi
-
-   if [ -z "$WORKER_LOGGING_LEVEL" ]; then
-      WORKER_LOGGING_LEVEL=INFO
-   fi
-
-   echo Starting workers   
-   if [ -n "$WORKER_RELAY_HOST" ]; then
-      ssh $WORKER_USERNAME@$WORKER_RELAY_HOST "mkdir -p condor"
-      ssh $WORKER_USERNAME@$WORKER_RELAY_HOST "grid-proxy-init"
-      ssh $WORKER_USERNAME@$WORKER_RELAY_HOST "WORKER_INIT_CMD='$WORKER_INIT_CMD' WORKER_LOGGING_LEVEL=$WORKER_LOGGING_LEVEL run-gwms-workers $EXECUTIONURL $CONDOR_WORKERS" 
-   else
-      mkdir -p condor
-      run-gwms-workers $EXECUTIONURL $CONDOR_WORKERS
-   fi
-}
-
-# Start cobalt workers
-start-workers-cobalt()
-{
-  PORT=$1
-  if [ -z "$PORT" ]; then
-      crash "start-workers-cobalt: Port number not specified, giving up"
-  fi
-  EXECUTIONURL=http://$IPADDR:$PORT
-  TIMESTAMP=$(date "+%Y.%m%d.%H%M%S")
-  R=${RANDOM}
-  ID="${TIMESTAMP}.${R}"
-  # -t in minutes
-  set -x
-  cqsub -q ${QUEUE}   \
-        -k ${KERNEL}    \
-        -t ${MAXTIME} \
-        -n ${NODES}   \
-        -C ${PWD}/${LOG_DIR} \
-        -E cobalt.${$}.stderr \
-        -o cobalt.${$}.stdout \
-        -e "WORKER_LOGGING_LEVEL=DEBUG:ZOID_ENABLE_NAT=true" \
-        $SWIFT_BIN/$WORKER $EXECUTIONURL $ID $PWD/$LOG_DIR
-
-  echo $! >> $PID_FILE
-  return 0
-}
-
+# Parse command line arguments
 while [ $# -gt 0 ]; do
    case $1 in
       -conf) CMDLN_CONF=$2; shift 2;;
@@ -399,29 +80,25 @@
    esac
 done
 
-if [ ! -d "$HOME/.swift" ]; then
-   mkdir -p "$HOME/.swift" || crash "Unable to create $HOME/.swift"
-fi
+# Determine the location of needed files
+export SWIFT_BIN="$( cd "$( dirname "$0" )" && pwd )"
+export WORKER="$SWIFT_BIN/worker.pl"
+export PID_FILE="$HOME/.swift/.coaster-service-pids"
+export COASTER_SERVICE="$SWIFT_BIN/coaster-service"
+export LOG="start-coaster-service.log"
+mkdir -p "$HOME/.swift" || crash "Unable to create $HOME/.swift"
 
-PID_FILE="$HOME/.swift/.coaster-service-pids"
-RUN_DIR=$PWD
-
 # Import settings
 if [ -f "$CMDLN_CONF" ]; then
    CONFIG_FILE=$CMDLN_CONF
-elif [ -f "$RUN_DIR/coaster-service.conf" ]; then
-   CONFIG_FILE="$RUN_DIR/coaster-service.conf"
-elif [ -f "$HOME/.swift/coaster-service.conf" ]; then
-   CONFIG_FILE="$HOME/.swift/coaster-service.conf"
-elif [ -f "$(dirname $(readlink -f $0))/../etc/coaster-service.conf" ]; then
-   CONFIG_FILE="$(dirname $(readlink -f $0))/../etc/coaster-service.conf"
+elif [ -f "coaster-service.conf" ]; then
+   CONFIG_FILE="coaster-service.conf"
 else
    crash "Cannot find coaster-service.conf!"
 fi
 
 echo "Start-coaster-service..."
 echo "Configuration: $CONFIG_FILE"
-
 source $CONFIG_FILE
 
 # Determine IP address to which workers should connect
@@ -430,79 +107,59 @@
       IPADDR=localhost
    elif [ -n "$GLOBUS_HOSTNAME" ]; then
       IPADDR=$GLOBUS_HOSTNAME
-   elif [ -x "/sbin/ifconfig" ]; then
-      IPADDR=$( /sbin/ifconfig 2>/dev/null | grep 'inet addr' | grep -v 127.0.0.1 | cut -d ':' -f 2 | awk '{print $1}' |head -1)
    else
-      crash "Unable to determine IP address of system. Please add to coaster-service.conf"
+      crash "Unable to determine IP address"
    fi
 fi
+
 echo Service address: $IPADDR
 
-# Find swift
-if [ ! -x "$SWIFT" ]; then
-   SWIFT=`which swift`
-   if [ ! -x "$SWIFT" ]; then
-      crash "Unable to find swift! Please either add to your $PATH or specify the path in coaster-service.conf"
-   fi
-fi
-
-SWIFT_BIN=`dirname $SWIFT`
-WORKER=worker.pl
-
 # Verify worker script is there
-if [ ! -x "$SWIFT_BIN/$WORKER" ]; then
-   crash "Error: Unable to find worker at $SWIFT_BIN/$WORKER!"
+if [ ! -x "$WORKER" ]; then
+   crash "Error: Unable to find worker $WORKER!"
 fi
 
-# Try to create $LOG_DIR if needed, relative to $RUN_DIR
-if [ ! -d "$RUN_DIR/$LOG_DIR" ]; then
-   mkdir -p "$RUN_DIR/$LOG_DIR" > /dev/null 2>&1
-   if [ ! -d "$RUN_DIR/$LOG_DIR" ]; then
-      crash "Unable to make directory $RUN_DIR/$LOG_DIR!"
-   fi
-fi
-
-# Set paths to log files
-SWIFT_LOG="$RUN_DIR/$LOG_DIR"/swift.out
-COASTER_LOG="$RUN_DIR/$LOG_DIR"/coaster.log
-
 # Verify we can find coaster service
-if [ ! -x "$SWIFT_BIN/coaster-service" ]; then
-   crash "Unable to find $SWIFT_BIN/coaster-service!"
+if [ ! -x "$COASTER_SERVICE" ]; then
+   crash "Unable to find $COASTER_SERVICE!"
 fi
 
 # Create files for storing port info, if needed
 if [ -z "$LOCAL_PORT" ]; then
-   LOCAL_PORT_FILE=`mktemp`
+   LOCAL_PORT_FILE=$( mktemp )
 fi
 
 if [ -z "$SERVICE_PORT" ]; then
-   SERVICE_PORT_FILE=`mktemp`
+   SERVICE_PORT_FILE=$( mktemp )
 fi
 
 # Check values in configuration file to determine how we should start coaster-service
 echo Starting coaster-service
 if [ -z "$SERVICE_PORT" ] && [ -z "$LOCAL_PORT" ]; then
-   $SWIFT_BIN/coaster-service -nosec -portfile $SERVICE_PORT_FILE -localportfile $LOCAL_PORT_FILE -passive > $COASTER_LOG 2>&1 &
+   run_command_bg $COASTER_SERVICE -nosec -portfile $SERVICE_PORT_FILE -localportfile $LOCAL_PORT_FILE -passive 
 elif [ -n "$SERVICE_PORT" ] && [ -z "$LOCAL_PORT" ]; then
-   $SWIFT_BIN/coaster-service -nosec -port $SERVICE_PORT -localportfile $LOCAL_PORT_FILE -passive > $COASTER_LOG 2>&1 &
+   run_command_bg $COASTER_SERVICE -nosec -port $SERVICE_PORT -localportfile $LOCAL_PORT_FILE -passive 
 elif [ -z "$SERVICE_PORT" ] && [ -n "$LOCAL_PORT" ]; then
-   $SWIFT_BIN/coaster-service -nosec -portfile $SERVICE_PORT_FILE --localport $LOCAL_PORT -passive > $COASTER_LOG 2>&1 &
+   run_command_bg $COASTER_SERVICE -nosec -portfile $SERVICE_PORT_FILE --localport $LOCAL_PORT -passive
 elif [ -n  "$SERVICE_PORT" ] && [ -n "$LOCAL_PORT" ]; then
-   $SWIFT_BIN/coaster-service -nosec -port $SERVICE_PORT -localport $LOCAL_PORT -passive > $COASTER_LOG 2>&1 &
-else
-   crash "Unknown SERVICE_PORT type specified!"
+   run_command_bg $COASTER_SERVICE -nosec -port $SERVICE_PORT -localport $LOCAL_PORT -passive
 fi
 
-echo $! >> $PID_FILE
-sleep 15
+# If waiting on port files to be created, wait for files to be created, but no longer
+if [ -z "$SERVICE_PORT" ]; then
+   wait_for_file $SERVICE_PORT_FILE 60
+fi
 
+if [ -z "$LOCAL_PORT" ]; then
+   wait_for_file $LOCAL_PORT_FILE 60
+fi
+
 # Determine SERVICE_PORT
 if [ -z "$SERVICE_PORT" ]; then
    if [ ! -f "$SERVICE_PORT_FILE" ]; then
       crash "Unable to determine SERVICE_PORT!"
    fi
-   SERVICE_PORT=`cat $SERVICE_PORT_FILE`
+   SERVICE_PORT=$( cat $SERVICE_PORT_FILE )
    rm $SERVICE_PORT_FILE
 fi
 
@@ -511,7 +168,7 @@
    if [ ! -f "$LOCAL_PORT_FILE" ]; then
       crash "Unable to determine LOCAL_PORT!"
    fi
-   LOCAL_PORT=`cat $LOCAL_PORT_FILE`
+   LOCAL_PORT=$( cat $LOCAL_PORT_FILE )
    rm $LOCAL_PORT_FILE
 fi
 
@@ -520,70 +177,23 @@
 
 # Generate sites.xml
 export EXECUTIONURL="http://$IPADDR:$SERVICE_PORT"
+export WORKERURL="http://$IPADDR:$LOCAL_PORT"
+
 echo Generating sites.xml
 if [ -f "gensites.template" ]; then
-   gensites `cat gensites.template` -p $CONFIG_FILE > $RUN_DIR/sites.xml
+   gensites $( cat gensites.template ) -p $CONFIG_FILE > sites.xml
 else
-   gensites persistent-coasters -p $CONFIG_FILE > $RUN_DIR/sites.xml
+   gensites persistent-coasters -p $CONFIG_FILE > sites.xml
 fi
 
-# For evil bug #467 
-echo "app echo (string i) { echo i; }" > hi.swift
-echo "echo(\"hi\");" >> hi.swift
-swift -sites.file sites.xml -tc.file tc.data -config cf hi.swift > /dev/null 2>&1 &
-DUMMYPID=$!
-sleep 15
-
 # Start workers
 case $WORKER_MODE in
-   ssh)
-      start-workers-ssh $LOCAL_PORT
-      ;;
-   local)
-      start-workers-local $LOCAL_PORT
-      ;;
-   cobalt)
-      start-workers-cobalt $LOCAL_PORT
-      ;;
-   futuregrid)
-      start-workers-futuregrid $LOCAL_PORT
-      ;;
-   gp)
-      start-workers-gp $LOCAL_PORT
-      ;;
-   ec2)
-      start-workers-ec2 $LOCAL_PORT
-      ;;
-   condor)
-      start-workers-condor $LOCAL_PORT
-      ;;
-   gwms)
-      start-workers-gwms $LOCAL_PORT
-      ;;
-   slurm)
-      start-workers-slurm $LOCAL_PORT
-      ;;
-   *)
-      crash "Unknown WORKER_MODE. Please modify coaster-service.conf"
-      ;;
+   ssh) start-workers-ssh;;
+   local) start-workers-local;;
+   scheduler) start-workers-scheduler;;
+   *) crash "Unknown WORKER_MODE";;
 esac
 
-# Wait for dummy script to finish
-wait $DUMMYPID
-
-# Generate config file
-if [ "$SHARED_FILESYSTEM" == "no" ]; then
-echo Generating config file
-cat > $RUN_DIR/cf << EOF
-use.provider.staging=true
-wrapperlog.always.transfer=false
-execution.retries=10
-provider.staging.pin.swiftfiles=false
-sitedir.keep=false
-use.wrapper.staging=false
-EOF
-fi
-
 # Local Variables:
 # tab-width: 3
 # sh-basic-offset: 3

Modified: trunk/etc/sites/persistent-coasters
===================================================================
--- trunk/etc/sites/persistent-coasters	2013-10-13 05:01:30 UTC (rev 7154)
+++ trunk/etc/sites/persistent-coasters	2013-10-14 05:37:26 UTC (rev 7155)
@@ -1,8 +1,7 @@
-<config>
+<?xml version="1.0" encoding="UTF-8"?>
+<config xmlns="http://www.ci.uchicago.edu/swift/SwiftSites">
   <pool handle="persistent-coasters">
-    <execution provider="coaster-persistent"
-               url="_EXECUTIONURL_"
-               jobmanager="local:local"/>
+    <execution provider="coaster-persistent" url="_EXECUTIONURL_" jobmanager="local:local"/>
     <profile namespace="globus" key="workerManager">passive</profile>
     <profile namespace="globus" key="jobsPerNode">_JOBSPERNODE_</profile>
     <profile key="jobThrottle" namespace="karajan">_JOBTHROTTLE_</profile>




More information about the Swift-commit mailing list