[Swift-commit] r2796 - usertools/cio/bin

noreply at svn.ci.uchicago.edu noreply at svn.ci.uchicago.edu
Wed Apr 1 16:24:57 CDT 2009


Author: aespinosa
Date: 2009-04-01 16:24:55 -0500 (Wed, 01 Apr 2009)
New Revision: 2796

Added:
   usertools/cio/bin/ciologic-bgp.sh
   usertools/cio/bin/falkon-start.sh
Removed:
   usertools/cio/bin/cnip-work.sh
   usertools/cio/bin/torus-falkon-start-bgp-ram.sh
   usertools/cio/bin/zoid-user-script.sh
Log:
Cleanup of MTIO logic startup scripts


Added: usertools/cio/bin/ciologic-bgp.sh
===================================================================
--- usertools/cio/bin/ciologic-bgp.sh	                        (rev 0)
+++ usertools/cio/bin/ciologic-bgp.sh	2009-04-01 21:24:55 UTC (rev 2796)
@@ -0,0 +1,130 @@
+#!/fuse/bin/bash
+
+# Script: ciologic-bgp.sh
+# Description: starts ciologic for the BlueGene
+
+# Check sanity of environment
+
+if [ -z $CIOROOT ]; then
+  echo "CIOROOT not defined"
+  exit 1
+fi
+if [ -z $CIOARCH ]; then
+  echo "CIOARCH note defined"
+  exit 1
+fi
+
+# BGP specific initialization
+
+LD_LIBRARY_PATH=/lib:/fuse/lib:/fuse/usr/lib
+PATH=/fuse/bin:/fuse/usr/bin:$PATH
+
+/home/iskra/ZeptoOS/packages/cnip/prebuilt/cn-ipfwd.2409 &
+while [ ! -f /tmp/ifconfig.cmd ]; do
+    sleep 1
+done
+. /tmp/ifconfig.cmd
+
+# function: get_rank
+#  return the rank of the node this is running on
+
+get_rank(){
+  echo $CONTROL_INIT | awk -F, '{print $4}'
+}
+
+# function: get_ip [rank]
+#    given a rank of a node. return its ip address
+
+get_ip(){
+  rank=$1
+  echo "10.128.$(( rank / 256)).$((rank % 256))"
+}
+
+ifs_head(){
+  stripe_size=$1
+  rank=`get_rank`
+
+  mkdir -p /dev/shm/share/stripe/root
+  cat > /dev/shm/share/.__acl << EOF
+address:192.168.1.* rwlda
+address:10.* rwlda
+address:127.0.0.1 rwlda
+EOF
+
+  $CHIRPROOT/bin/chirp_server -r /dev/shm/share 
+
+  # Enable striping
+  cat /dev/shm/share/.__acl > /dev/shm/share/stripe/.__acl
+  cat /dev/shm/share/.__acl > /dev/shm/share/stripe/root/.__acl
+  echo bigvolfiles > /dev/shm/share/stripe/key
+  for (( i = 0; i < stripe_size; i++ )); do
+    slave_rank=$((rank + i))
+    echo `get_ip $slave_rank` >> /dev/shm/share/stripe/hosts
+  done
+
+  mkdir -p /chirp
+  $CHIRPROOT/bin/chirp_fuse -a address /chirp
+
+  ln -sf /chirp/`get_ip $rank` /dataifs
+}
+
+# function: ifs_slave [head]
+#  Starts the supporting data nodes for the stripe to the
+#  [head] node.
+ 
+ifs_slave(){
+  ifs_rank=$1
+  # For Chirp, just start the server
+  mkdir -p /dev/shm/share
+  cat > /dev/shm/share/.__acl << EOF
+address:192.168.1.* rwlda
+address:10.* rwlda
+address:127.0.0.1 rwlda
+EOF
+
+  $CHIRPROOT/bin/chirp_server -r /dev/shm/share 
+}
+
+ifs_mount() {
+  ifs_rank=$1
+  # For Chirp, just start the server
+  mkdir -p /dev/shm/share
+  cat > /dev/shm/share/.__acl << EOF
+address:192.168.1.* rwlda
+address:10.* rwlda
+address:127.0.0.1 rwlda
+EOF
+
+  $CHIRPROOT/bin/chirp_server -r /dev/shm/share &
+  mkdir /chirp
+  $CHIRPROOT/bin/chirp_fuse -a address /chirp
+}
+
+PART_SIZE=$8
+IFS_NUM=$9
+STRIPE_SIZE=${10}
+echo $PART_SIZE $IFS_NUM $STRIPE_SIZE
+
+RANK=`get_rank`
+IFS_GRP=$(( PART_SIZE / IFS_NUM ))
+IFS_RANK=$(( RANK / IFS_GRP + 1 ))
+IFS_SLAVE=$(( IFS_RANK + STRIPE_SIZE ))
+
+export IFS_RANK
+export CHIRP_ADD=`get_ip $IFS_RANK`
+
+echo "RANK: $RANK" "IFS_RANK: $IFS_RANK"
+
+#Core MTIO logic
+if [ $RANK -eq 0 ]; then
+    $CIOROOT/libexec/hashserver.rb 
+elif [ $RANK -eq $IFS_RANK ]; then
+    ifs_head $STRIPE_SIZE
+    /home/zzhang/cio/bin/collector.sh 
+elif [ $RANK -lt $IFS_SLAVE ]; then
+    ifs_slave $IFS_RANK
+    sleep 3600
+else
+    ifs_mount $IFS_RANK
+    $CIOROOT/libexec/falkon/runworker-bgp.sh $1 $2 $3 $4 $5 $6 $7
+fi


Property changes on: usertools/cio/bin/ciologic-bgp.sh
___________________________________________________________________
Name: svn:executable
   + *

Deleted: usertools/cio/bin/cnip-work.sh
===================================================================
--- usertools/cio/bin/cnip-work.sh	2009-04-01 05:20:45 UTC (rev 2795)
+++ usertools/cio/bin/cnip-work.sh	2009-04-01 21:24:55 UTC (rev 2796)
@@ -1,69 +0,0 @@
-#!/fuse/bin/bash
-LD_LIBRARY_PATH=/lib:/fuse/lib:/fuse/usr/lib
-PATH=/fuse/bin:/fuse/usr/bin:$PATH
-
-#Torus START
-/home/iskra/ZeptoOS/packages/cnip/prebuilt/cn-ipfwd.2409 &
-
-while [ ! -f /tmp/ifconfig.cmd ]; do
-    sleep 1
-done
-
-. /tmp/ifconfig.cmd
-
-#IP Calculation
-RANK=`echo $CONTROL_INIT | awk -F, '{print $4}'`
-FIRST=`expr $RANK % 256`
-SECOND=`expr $RANK / 256`
-
-BASE=`expr $FIRST / 64`
-MODULO=`expr $FIRST % 64`
-CHIRP_BASE=`expr $BASE \* 64`
-CHIRP_1=`expr $CHIRP_BASE + 1`
-CHIRP_2=`expr $CHIRP_BASE + 2`
-CHIRP_3=`expr $CHIRP_BASE + 3`
-CHIRP_4=`expr $CHIRP_BASE + 4`
-export CHIRP_ADD=10.128.$SECOND.$CHIRP_1
-#Chirp START
-if [ -d /dev/shm/share ];
-    then
-    rm -rf /dev/shm/share 
-fi
-mkdir /dev/shm/share
-echo "address:192.168.1.* rwlda" >>/dev/shm/share/.__acl
-echo "address:10.* rwlda" >> /dev/shm/share/.__acl
-echo "address:127.0.0.1 rwlda" >> /dev/shm/share/.__acl
-
-/home/zzhang/chirp/bin/chirp_server -r /dev/shm/share &
-
-#MAIN
-if [ "$RANK" = "0" ] 
-then
-    /home/zzhang/cio/bin/hashserver.rb &
-elif [ "$MODULO" = "1" ] 
-then
-    cd /dev/shm/share
-    mkdir stripe
-    cd stripe
-    cp ../.__acl .
-    mkdir root
-    cp .__acl root/
-    echo 10.128.$SECOND.$CHIRP_1 >> hosts
-    echo 10.128.$SECOND.$CHIRP_2 >> hosts
-    echo 10.128.$SECOND.$CHIRP_3 >> hosts
-    echo 10.128.$SECOND.$CHIRP_4 >> hosts
-    echo bigvolfiles > key
-
-    mkdir /chirp
-    /home/zzhang/chirp/bin/chirp_fuse -a address /chirp
-    /home/zzhang/cio/bin/collector.sh &
-elif [ "$MODULO" = "2" ] || [ "$MODULO" = "3" ] || [ "$MODULO" = "4" ] 
-then
-    sleep 3600
-else
-    mkdir /chirp
-    /home/zzhang/chirp/bin/chirp_fuse -a address /chirp
-    /home/zzhang/falkon/worker/run.worker-c-bgp.sh $1 $2 $3 $4 $5 $6 $7
-fi
-
-sleep 3600

Added: usertools/cio/bin/falkon-start.sh
===================================================================
--- usertools/cio/bin/falkon-start.sh	                        (rev 0)
+++ usertools/cio/bin/falkon-start.sh	2009-04-01 21:24:55 UTC (rev 2796)
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+if [ -z $CIOROOT ]; then
+  echo "ERROR: CIOROOT env not defined"
+  exit 1
+fi
+if [ -z $CIOARCH ]; then
+  echo "ERROR: CIOARCH not defined"
+  exit 1
+fi
+if [ -z $CHIRPROOT ]; then
+  echo "ERROR: CHIRPROOT not defined"
+  exit 1
+fi
+$CIOROOT/libexec/falkon/falkon-start-$CIOARCH.sh


Property changes on: usertools/cio/bin/falkon-start.sh
___________________________________________________________________
Name: svn:executable
   + *

Deleted: usertools/cio/bin/torus-falkon-start-bgp-ram.sh
===================================================================
--- usertools/cio/bin/torus-falkon-start-bgp-ram.sh	2009-04-01 05:20:45 UTC (rev 2795)
+++ usertools/cio/bin/torus-falkon-start-bgp-ram.sh	2009-04-01 21:24:55 UTC (rev 2796)
@@ -1,174 +0,0 @@
-#!/bin/bash
-
-
- if [ -z "$3" ]; then 
-              echo "usage: $0 <QueueName> <NumNodes> <MaxTimeMin>"
-              echo "usage: $0 prod 1024 60"
-              echo "-- or --"
-              echo "usage: $0 <QueueName> <NumNodes> <MaxTimeMin> <WorkersPerNode>"
-              echo "usage: $0 prod 1024 60 4"
-              exit 1
-          fi
-
-          
-if [ -z "${FALKON_HOME}" ]; then
-    echo "ERROR: environment variable FALKON_HOME not defined"
-    exit 1
-fi
-
-
-QUEUE_NAME=$1
-PROFILE_NAME="zeptocn"
-NUM_NODES=$2
-let NUM_ION=NUM_NODES/64
-MAX_TIME_MIN=$3
-SERVICE_IP="192.168.1.254"
-SERVICE_PORT1=55000
-SERVICE_PORT2=55001
-WORKERS_PER_NODE=4
-if [ ! -z $4 ];then
-   WORKERS_PER_NODE=$4
-fi 
-
-
-cp /home/zzhang/cio/bin/zoid-user-script.sh ${HOME}/zoid-user-script.sh
-chmod +x ${HOME}/zoid-user-script.sh
-
-FALKON_JOB_ID=`falkon-id-get.sh N/A`                   
-EXIT_CODE=$? 
-
-
-    if [ "${EXIT_CODE}" -ne "0" ]; then
-    echo "Error in geting a unique falkon ID.. 'falkon-get-id.sh N/A'"
-    cqdel ${ZOID_JOB_ID}
-    exit ${EXIT_CODE}
-    fi
-
-
-
-#ZOID_JOB_ID=`cqsub -q ${QUEUE_NAME} -k ${PROFILE_NAME} -C ${HOME} -t ${MAX_TIME_MIN} -n ${NUM_NODES} -e LD_LIBRARY_PATH=/lib:/fuse/lib:/fuse/usr/lib /bgsys/linux/1.2.020080512/bin/bash /home/zzhang/falkon/worker/run.worker-c-bgp.sh ${SERVICE_IP} ${SERVICE_PORT1} ${SERVICE_PORT2} ${WORKERS_PER_NODE} ${USER} ${FALKON_JOB_ID} ${FALKON_ROOT}`
-
-ZOID_JOB_ID=`cqsub -q ${QUEUE_NAME} -k ${PROFILE_NAME} -C ${HOME} -t ${MAX_TIME_MIN} -n ${NUM_NODES} -e LD_LIBRARY_PATH=/lib:/fuse/lib:/fuse/usr/lib /bgsys/linux/1.2.020080512/bin/bash /home/zzhang/cio/bin/cnip-work.sh ${SERVICE_IP} ${SERVICE_PORT1} ${SERVICE_PORT2} ${WORKERS_PER_NODE} ${USER} ${FALKON_JOB_ID} ${FALKON_ROOT}`
-
-#ZOID_JOB_ID=`cqsub -q ${QUEUE_NAME} -k ${PROFILE_NAME} -C ${HOME} -t ${MAX_TIME_MIN} -n ${NUM_NODES} -e LD_LIBRARY_PATH=/lib:/fuse/lib:/fuse/usr/lib /bin/sleep 1800`
-
-EXIT_CODE=$? 
-
-    if [ "${EXIT_CODE}" -ne "0" ]; then
-    echo "Error in submitting job to Cobalt.. 'cqsub -q ${QUEUE_NAME} -k ${PROFILE_NAME} -C ${HOME} -t ${MAX_TIME_MIN} -n ${NUM_NODES} -e LD_LIBRARY_PATH=/lib:/fuse/lib:/fuse/usr/lib /bgsys/linux/1.2.020080512/bin/bash /fuse/${FALKON_WORKER_HOME}/run.worker-c-bgp.sh ${SERVICE_IP} ${SERVICE_PORT1} ${SERVICE_PORT2} ${WORKERS_PER_NODE} ${USER} ${FALKON_JOB_ID} ${FALKON_ROOT}' "
-    exit ${EXIT_CODE}
-    fi
-
-falkon-id-update.sh ${FALKON_JOB_ID} ${ZOID_JOB_ID}
-
-EXIT_CODE=$? 
-
-    if [ "${EXIT_CODE}" -ne "0" ]; then
-    echo "Error in updating cobalt job info for falkon.. 'falkon-update-id.sh ${ZOID_JOB_ID}'"
-    cqdel ${ZOID_JOB_ID}
-    exit ${EXIT_CODE}
-    fi
-
-
-FALKON_JOBID_HOME=${FALKON_ROOT}/users/${USER}/${FALKON_JOB_ID}
-
-echo "Submitted job ${ZOID_JOB_ID} to Cobalt, creating the job specific Falkon tree for logs and configuration in ${FALKON_JOBID_HOME}..."     
-                                            
-mkdir -p ${FALKON_JOBID_HOME}
-
-cp ${FALKON_HOME}/falkon.env.bgp* ${FALKON_JOBID_HOME}/
-cp -r ${FALKON_HOME}/config ${FALKON_JOBID_HOME}/
-cp ${FALKON_JOBID_HOME}/config/Client-service-URIs.config2 ${FALKON_JOBID_HOME}/config/Client-service-URIs.config
-mkdir -p ${FALKON_JOBID_HOME}/logs/client ${FALKON_JOBID_HOME}/logs/service ${FALKON_JOBID_HOME}/logs/provisioner ${FALKON_JOBID_HOME}/logs/worker
-
-                           
-DATE=`date +%s`      
-echo "$DATE: pre-creating log dirs for Falkon service..."
-RACK_START=0
-RACK_END=48
-SEQUENCE_DIR=`seq -w ${RACK_START} ${RACK_END}`
-PSET_START=1
-PSET_END=16
-for a in ${SEQUENCE_DIR}
-do
-    for ((b=${PSET_START}; b <= ${PSET_END} ; b++))  # Double parentheses, and "LIMIT" with no "$".
-    do
-        DIR_NAME="ion-R${a}-${b}"
-        mkdir -p ${FALKON_JOBID_HOME}/logs/service/$DIR_NAME
-    done
-done
-
-for ((b=${PSET_START}; b <= ${PSET_END} ; b++))  # Double parentheses, and "LIMIT" with no "$".
-do
-        DIR_NAME="ion-${b}"
-        mkdir -p ${FALKON_JOBID_HOME}/logs/service/$DIR_NAME
-done
-
-
-DATE=`date +%s`      
-echo "$DATE: done creating log dirs for Falkon service!"
-
-
-
-DATE=`date +%s`      
-echo "$DATE: pre-creating data dirs for Falkon service..."
-RACK_START=0
-RACK_END=48
-SEQUENCE_DIR=`seq -w ${RACK_START} ${RACK_END}`
-PSET_START=1
-PSET_END=16
-for a in ${SEQUENCE_DIR}
-do
-    for ((b=${PSET_START}; b <= ${PSET_END} ; b++))  # Double parentheses, and "LIMIT" with no "$".
-    do
-        DIR_NAME="ion-R${a}-${b}"
-        mkdir -p ${FALKON_JOBID_HOME}/data/$DIR_NAME
-    done
-done
-
-for ((b=${PSET_START}; b <= ${PSET_END} ; b++))  # Double parentheses, and "LIMIT" with no "$".
-do
-        DIR_NAME="ion-${b}"
-        mkdir -p ${FALKON_JOBID_HOME}/data/$DIR_NAME
-done
-
-
-DATE=`date +%s`      
-echo "$DATE: done creating data dirs for Falkon service!"
-
-
-
-FALKON_HOME_RAM=/tmp/${USER}/falkon
-
-ln -s ${FALKON_HOME}/apps ${FALKON_JOBID_HOME}/apps
-ln -s ${FALKON_HOME_RAM}/container ${FALKON_JOBID_HOME}/container
-ln -s ${FALKON_HOME}/service ${FALKON_JOBID_HOME}/service
-ln -s ${FALKON_HOME}/worker ${FALKON_JOBID_HOME}/worker
-ln -s ${FALKON_HOME}/AstroPortal ${FALKON_JOBID_HOME}/AstroPortal
-ln -s ${FALKON_HOME}/client ${FALKON_JOBID_HOME}/client
-ln -s ${FALKON_HOME}/monitor ${FALKON_JOBID_HOME}/monitor
-ln -s ${FALKON_HOME}/bin ${FALKON_JOBID_HOME}/bin
-ln -s ${FALKON_HOME}/config ${FALKON_JOBID_HOME}/config
-ln -s ${FALKON_HOME}/ploticus ${FALKON_JOBID_HOME}/ploticus
-ln -s ${FALKON_HOME}/webserver ${FALKON_JOBID_HOME}/webserver
-ln -s ${FALKON_HOME}/workloads ${FALKON_JOBID_HOME}/workloads
-ln -s ${FALKON_HOME}/id ${FALKON_JOBID_HOME}/id
-ln -s ${FALKON_HOME}/apache-ant-1.7.0 ${FALKON_JOBID_HOME}/apache-ant-1.7.0
-ln -s ${FALKON_HOME}/ibm-java2-ppc64-50 ${FALKON_JOBID_HOME}/ibm-java2-ppc64-50
-ln -s ${FALKON_HOME_RAM}/ibm-java2-ppc-50 ${FALKON_JOBID_HOME}/ibm-java2-ppc-50
-ln -s ${FALKON_HOME}/falkon.tgz ${FALKON_JOBID_HOME}/falkon.tgz
-
-
-if [ ! -d "${FALKON_JOBID_HOME}" ]; then
-    echo "ERROR: invalid path ${FALKON_JOBID_HOME}... exiting"
-    cqdel ${ZOID_JOB_ID}
-    exit 1
-fi
-
-echo "Succesfully submitted the job to Cobalt, and setup job specific Falkon tree!"
-echo "To monitor the job status, type 'cqstat | grep ${USER}'; once it is in running state, you can use the Falkon specific command ...."
-echo "To submit your Falkon-based workload, type: ....; you can do this any time, the falkon workload will wait for the resources to come online, and will only be submitted when everything is ready; the script is run in the background, so the workload will run even if the ssh session gets disconnected."
-echo ""
-echo "Remember, your job id is ${ZOID_JOB_ID}, and if you need to look through the logs manually for anything, remember that you can find them at ${HOME}/${ZOID_JOB_ID}.output, ${HOME}/${ZOID_JOB_ID}.error, and ${FALKON_JOBID_HOME}/logs/..."
-
-

Deleted: usertools/cio/bin/zoid-user-script.sh
===================================================================
--- usertools/cio/bin/zoid-user-script.sh	2009-04-01 05:20:45 UTC (rev 2795)
+++ usertools/cio/bin/zoid-user-script.sh	2009-04-01 21:24:55 UTC (rev 2796)
@@ -1,90 +0,0 @@
-#!/bin/bash
-
-#IP=`hostname -i`
-#echo ${IP} >> /home/falkon/falkon/logs/provisioner/zoid-log.txt
-                       
-#startup
-    if [ "${1}" -eq "1" ]; then
-                            
-#ERROR_LOG=/home/falkon/users/error.log
-ERROR_LOG=/dev/null
-
-    echo "starting zoid-user-script.sh..." >> ${ERROR_LOG}
-    #echo "reviewing the command and arguements: $0 $1 $2 $3 $4 $5 $6 $7..." >> ${ERROR_LOG}
-    #tENV=`env`
-    #echo "reviewing the envirnoment: $tENV..." >> ${ERROR_LOG}
-
-original=$ZOID_JOB_ARGS
-a0=${original%%:*}; rest=${original#*:}
-a1=${rest%%:*}; rest=${rest#*:}
-a2=${rest%%:*}; rest=${rest#*:}
-a3=${rest%%:*}; rest=${rest#*:}
-a4=${rest%%:*}; rest=${rest#*:}
-a5=${rest%%:*}; rest=${rest#*:}
-a6=${rest%%:*}; rest=${rest#*:}
-a7=${rest%%:*};
-
-    
-    echo "reviewing the command and arguements: $a0 $a1 $a2 $a3 $a4 $a5 $a6 $a7..." >> ${ERROR_LOG}
-    
-    
-#FALKON_JOBID_HOME=/home/falkon/users/${USER}/${ZOID_JOB_ID}
-FALKON_JOBID_HOME=${a7}/users/${a5}/${a6}
-FALKON_HOME=${a7}/users/${a5}/${a6}
-    echo "FALKON_JOBID_HOME: $FALKON_JOBID_HOME" >> ${ERROR_LOG}
-MACH_ID=`uname -n`
-       #this should be done via some configuration parameter
-
-echo ${FALKON_JOBID_HOME}/data/${MACH_ID} >> /tmp/iohost
-if [ ! -d "${FALKON_JOBID_HOME}" ]; then
-    echo "ERROR: invalid path ${FALKON_JOBID_HOME}... exiting" >> ${ERROR_LOG}
-    exit 1
-fi
-
-
-
-    
-   cd ${FALKON_JOBID_HOME}
-   #source falkon.env.bgp-io
-
-   echo "ION at ${MACH_ID} ==> ${FALKON_HOME}..."  >> ${ERROR_LOG}
-
-    LOG_FILE_DEBUG="${FALKON_HOME}/logs/provisioner/STATUS_ION_${MACH_ID}"
-    DATE=`date`
-    echo "${DATE} : falkon-ion-start : starting the I/O node Falkon startup in the background..." >> ${LOG_FILE_DEBUG}
-
-
-if [ ! -e "${FALKON_HOME}/bin/falkon-ion-start-blocking-zeptocn.sh" ]; then
-    echo "ERROR: invalid script ${FALKON_HOME}/bin/falkon-ion-start-blocking-zeptocn.sh" >> ${ERROR_LOG}
-    exit 1
-fi
-
-     #used to run from GPFS
-    #${FALKON_JOBID_HOME}/bin/falkon-ion-start-blocking-zeptocn.sh ${FALKON_JOBID_HOME} &
-    #used to run from RAM
-    ${FALKON_JOBID_HOME}/bin/falkon-ion-start-blocking-zeptocn-ram.sh ${FALKON_JOBID_HOME} &
-
-
-    EXIT_CODE=$? 
-    DATE=`date`
-
-    echo "${DATE} : falkon-ion-start : completed the I/O node Falkon startup in the background..." >> ${LOG_FILE_DEBUG}
-
-   echo "ION at ${MACH_ID} finished OK!"  >> ${ERROR_LOG}
-
-                   
-exit ${EXIT_CODE}
-
-
-    fi
-
-
-                   
-#cleanup
-    if [ "${1}" -eq "0" ]; then
-        
-        killall -9 java                      
-        exit 0
-
-    fi
-    




More information about the Swift-commit mailing list