[Swift-commit] r3113 - in usertools/cio: bin libexec/falkon
noreply at svn.ci.uchicago.edu
noreply at svn.ci.uchicago.edu
Fri Sep 25 18:02:36 CDT 2009
Author: aespinosa
Date: 2009-09-25 18:02:36 -0500 (Fri, 25 Sep 2009)
New Revision: 3113
Added:
usertools/cio/libexec/falkon/falkon-start-bgp_logging.sh
Removed:
usertools/cio/bin/bashrc
usertools/cio/bin/bcast.sh
usertools/cio/bin/wrapper.sh
usertools/cio/libexec/falkon/falkon-start-bgp-logging.sh
Modified:
usertools/cio/bin/ciologic-bgp.sh
usertools/cio/bin/falkon-start.sh
usertools/cio/bin/swift_bgp.sh
usertools/cio/libexec/falkon/falkon-start-bgp.sh
Log:
Used mosastore IFS instead of Chirp
Deleted: usertools/cio/bin/bashrc
===================================================================
--- usertools/cio/bin/bashrc 2009-09-22 18:28:15 UTC (rev 3112)
+++ usertools/cio/bin/bashrc 2009-09-25 23:02:36 UTC (rev 3113)
@@ -1,3 +0,0 @@
-export PATH=/home/zzhang/chirp/bin:$PATH
-export PATH=/home/zzhang/ruby-1.8.7-p72/bin/bin:$PATH
-PATH=/fuse/bin:/fuse/usr/bin:$PATH
\ No newline at end of file
Deleted: usertools/cio/bin/bcast.sh
===================================================================
--- usertools/cio/bin/bcast.sh 2009-09-22 18:28:15 UTC (rev 3112)
+++ usertools/cio/bin/bcast.sh 2009-09-25 23:02:36 UTC (rev 3113)
@@ -1,16 +0,0 @@
-#!/fuse/bin/bash
-
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/zzhang/cio/lib
-PATH=/fuse/bin:/fuse/usr/bin:$PATH
-
-IP=`/sbin/ifconfig | grep inet | tail -n 1 | cut -d ':' -f 2 |awk '{print $1}'`
-#mkdir -p /dev/shm/share/common
-#cp -r common/* /dev/shm/share/
-#chmod -R 755 /dev/shm/share/common
-#exit 1
-# tree network path
-DESTHOSTS=`seq 0 63 | sed "s/^/10.128.0./" | xargs`
-echo ${DESTHOSTS/$IP/" "}
-/home/zzhang/chirp/bin/chirp_distribute -a address -D 127.0.0.1 / ${DESTHOSTS/$IP/" "}
-
-exit 0
\ No newline at end of file
Modified: usertools/cio/bin/ciologic-bgp.sh
===================================================================
--- usertools/cio/bin/ciologic-bgp.sh 2009-09-22 18:28:15 UTC (rev 3112)
+++ usertools/cio/bin/ciologic-bgp.sh 2009-09-25 23:02:36 UTC (rev 3113)
@@ -3,38 +3,23 @@
# Script: ciologic-bgp.sh
# Description: starts ciologic for the BlueGene
-# Check sanity of environment
-
-if [ -z $CIOROOT ]; then
- echo "CIOROOT not defined"
- exit 1
-fi
-if [ -z $CIOARCH ]; then
- echo "CIOARCH note defined"
- exit 1
-fi
-
-# BGP specific initialization
-
-LD_LIBRARY_PATH=/lib:/fuse/lib:/fuse/usr/lib
-PATH=/fuse/bin:/fuse/usr/bin:$PATH
-
-/home/iskra/ZeptoOS/packages/cnip/prebuilt/cn-ipfwd &
-while [ ! -f /tmp/ifconfig.cmd ]; do
+# function: start_iptorus
+start_iptorus(){
+ /soft/apps/ZeptoOS-2.0-V1R3M0/cnbin/cn-ipfwd &
+ while [ ! -f /tmp/ifconfig.cmd ]; do
sleep 1
-done
-. /tmp/ifconfig.cmd
+ done
+ . /tmp/ifconfig.cmd
+}
# function: get_rank
# return the rank of the node this is running on
-
get_rank(){
echo $CONTROL_INIT | awk -F, '{print $4}'
}
# function: get_ip [rank]
# given a rank of a node. return its ip address
-
get_ip(){
rank=$1
echo "10.128.$(( rank / 256)).$((rank % 256))"
@@ -44,28 +29,30 @@
stripe_size=$1
rank=`get_rank`
- mkdir -p /dev/shm/share/stripe/root
- cat > /dev/shm/share/.__acl << EOF
-address:192.168.1.* rwlda
-address:10.* rwlda
-address:127.0.0.1 rwlda
-EOF
+ echo "Rank $rank: Starting manager"
+ cat > /tmp/manager_config.cfg << EOF
+# the recommended stripe width
+# this is the number of benefactors the client will strip the data among in the write operation
+stripe_width = $stripe_size
- # Enable striping
- cat /dev/shm/share/.__acl > /dev/shm/share/stripe/.__acl
- cat /dev/shm/share/.__acl > /dev/shm/share/stripe/root/.__acl
- echo bigvolfiles > /dev/shm/share/stripe/key
- for (( i = 0; i < stripe_size; i++ )); do
- slave_rank=$((rank + i))
- echo `get_ip $slave_rank` >> /dev/shm/share/stripe/hosts
- done
+# the size fo the chunk - not fully implemented yet
+# chunk_size = 1048576
- mkdir -p /chirp
- $CHIRPROOT/bin/chirp_fuse -a address /chirp
- ln -sf /chirp/`get_ip $rank` /dataifs
+# the maximum possible number of benefactors in the system
+max_num_ben = 4096
- $CHIRPROOT/bin/chirp_server -r /dev/shm/share
+# (Optional) Log mode could be : DEBUG, VERBOS, ERROR, FATAL, OFF
+log_mode = OFF
+
+# (Optional) log file name
+# if not provide and the log mode is not OFF, the log messages will be sent to stdout
+log_file = /home/espinosa/log/manager_$rank.log
+EOF
+ cd /tmp
+ /home/espinosa/bin/manager 7005 &
+ #/home/espinosa/bin/manager 7005 > /dev/null 2> /dev/null &
+ ifs_slave $rank
}
# function: ifs_slave [head]
@@ -73,77 +60,199 @@
# [head] node.
ifs_slave(){
- ifs_rank=$1
- # For Chirp, just start the server
+ ifs_rank=`get_ip $1`
+ rank=`get_rank`
+
+ sleep 30 # Wait for manager
+ echo "Rank XX: starting benefactor"
+
mkdir -p /dev/shm/share
- cat > /dev/shm/share/.__acl << EOF
-address:192.168.1.* rwlda
-address:10.* rwlda
-address:127.0.0.1 rwlda
+ cat > /tmp/benefactor_config.cfg << EOF
+# the hostname or the IP address of the manager
+manager_name = $ifs_rank
+
+# manager port number
+manager_port = 7005
+
+# The path to the local directory where the benefactor will store the chunks
+benefactor_path = /dev/shm/share
+
+# Aggregation type, this could be <DISK> or <MEMORY>
+# <DISK> is a typical setting for general workloads
+# <MEMORY> is under development
+aggregation_type = DISK
+
+# The donated disk space size in MB
+disk_space_size = 512
+
+# The donated memory space size in MB, this is effective if aggregation type= MEMORY
+memory_space_size = 512
+
+# The manager update period in seconds
+update_period = 5
+
+# (Optional) The local address the benefactor should use - specially in multihomed machines
+#benefactor_address =
+
+# (Optional) Log mode could be : DEBUG, VERBOS, ERROR, FATAL, OFF
+log_mode = OFF
+
+# (Optional) log file name
+# if not provide and the log mode is not OFF, the log messages will be sent to stdout
+log_file = /home/espinosa/log/benefactor_$rank.log
+
EOF
-
- $CHIRPROOT/bin/chirp_server -r /dev/shm/share
+ cd /tmp
+ /home/espinosa/bin/benefactor
}
ifs_mount() {
- ifs_rank=$1
- # For Chirp, just start the server
- if [ -d /dev/shm/share ]; then
- rm -rf /dev/shm/share
- fi
- mkdir -p /dev/shm/share
- cat > /dev/shm/share/.__acl << EOF
-address:192.168.1.* rwlda
-address:10.* rwlda
-address:127.0.0.1 rwlda
+ ifs_rank=`get_ip $1`
+ rank=`get_rank`
+
+ sleep 30 # Wait for manager
+ echo "Rank $rank: Mounting IFS"
+
+ cat > /tmp/flfs_config.cfg << EOF
+# the hostname or the IP address of the manager
+manager_name = $ifs_rank
+
+
+# manager port number
+manager_port = 7005
+
+
+# execution mode - not fully implemented yet - DEBUG is the only option for now
+execution_mode = DEBUG
+
+
+# Naming scheme - to name chunk by sequence number <SEQNUM> or by hash <HASH>
+# <SEQNUM> the chunks will be named by sequence number, this is a typical setting for general workloads
+# <HASH> this option is for content addressablity feature
+chunk_naming = SEQNUM
+
+
+# Commit scheme - specifies whether to overwrite, non-overwrite or version the previous copy.
+# possible values include <NOOVERWRITE> , <OVERWRITE>, and <VERSIONING>
+# <NOOVERWRITE> : writing a new file with the same name as an existing file will fail.
+# <OVERWRITE> : if a new file is stored in the system with the same file name as an existing file name,
+# the new file will overwrite the old file
+# <VERSIONING> : if a new file is stored in the system with the same file name as an existing file name,
+# the new file will be store as a new version of the file
+commit_scheme = NOOVERWRITE
+
+
+# Number of chunks to reserve in the repository
+num_reserve_chunks = 1024
+
+# Write Interface type - to select the write interface type, the following are the write interfaces
+# <SLIDINGWINDOWWRITE> : Sliding window write interface, this is the typical setting.
+# <INCREMENTALWRITE> : Incremental write interface
+# <COMPLETELOCALWRITE> : The Complete local write interface
+# <INCREMENTALWRITE> and <COMPLETELOCALWRITE> use the local disk in the write operation. these two are not extensively tested.
+write_interface_type = SLIDINGWINDOWWRITE
+
+
+#the memory space allocated for the buffers in the write operations, in MB,
+# effects Sliding window interface only
+memory_size = 256
+
+
+# if <INCREMENTALWRITE> is the selected write interface <inc_write_file_size> spacifies
+# the size of the temporary local files in number of chunks
+inc_write_file_size = 64
+
+
+# if <INCREMENTALWRITE> of <COMPLETELOCALWRITE> is the selected write interface <local_write_directory> spacifies
+# the path to the directory where the temporary files will be saved
+local_write_directory = /tmp/FLIncW
+
+# Read Interface type - to select the read interface type, currently <FBR> is only implemented
+# <FBR> : Fixed Buffer per Request read interface
+read_interface_type = FBR
+
+
+# if <FBR> is the selected read interface <fbr_request_buffer_size> specifies
+# the number of chunks allocated for every request
+fbr_request_buffer_size = 4
+
+# Number of threads per write agent ( there is an agent per benefactor )
+num_threads_per_agent = 1
+
+
+# Cache update period in seconds, if this value is set to 0 then the cache is disabled
+cache_update_period = 5
+
+
+# (Optional) Log mode could be : DEBUG, VERBOS, ERROR, FATAL, OFF
+log_mode = OFF
+
+# (Optional) log file name
+# if not provide and the log mode is not OFF, the log messages will be sent to stdout
+log_file = /home/espinosa/log/mosastore_$rank.log
+
EOF
-
- $CHIRPROOT/bin/chirp_server -r /dev/shm/share &
- mkdir /chirp
- $CHIRPROOT/bin/chirp_fuse -a address /chirp
- # TODO: make a symlink to a slave's proper IFS to
- # prevent always recalculating it in higher-level
- # scripts
- # ln -sf /chirp/`get_ip $ifs_rank`@stripe /ifsmount
+ mkdir -p /dataifs
+ cd /tmp
+ /home/espinosa/bin/mosastore -o direct_io -o sync_read /dataifs &
+ #/home/espinosa/bin/mosastore -o direct_io -o sync_read /dataifs -d 2> /dev/null > /dev/null &
}
+# Main
+#
+
+# Check sanity of environment
+if [ -z $CIOROOT ]; then
+ echo "CIOROOT not defined"
+ exit 1
+fi
+if [ -z $CIOARCH ]; then
+ echo "CIOARCH note defined"
+ exit 1
+fi
+
+# BGP specific initialization
+LD_LIBRARY_PATH=/lib:/fuse/lib:/fuse/usr/lib
+PATH=/fuse/bin:/fuse/usr/bin:$PATH
+
+# Initiate IP over Torus
+start_iptorus
+
+# Process args
PART_SIZE=$8 # also BG_SIZE
IFS_NUM=$9
STRIPE_SIZE=${10}
+# Compute rank
RANK=`get_rank`
IFS_GRP=$(( PART_SIZE / IFS_NUM ))
IFS_RANK=$(( RANK / IFS_GRP + 1 ))
IFS_SLAVE=$(( IFS_RANK + STRIPE_SIZE ))
-
export IFS_RANK
export CHIRP_ADD=`get_ip $IFS_RANK`
-# Save information
+# Save rank information
echo $RANK > /dev/shm/RANK
echo $IFS_RANK > /dev/shm/IFS_RANK
+echo $IFS_SLAVE > /dev/shm/IFS_SLAVE
-# Generate Hash services
-DHT=$(( RANK % 128 ))
-if [ -f /dev/shm/DHTlist ]; then
- rm -f /dev/shm/DHTlist
-fi
-for (( i = 0; i < $PART_SIZE; i = i + 128 )); do
- echo `get_ip $i` >> /dev/shm/DHTlist
-done
-
-#Core MTIO logic
-if [ $DHT -eq 0 ]; then
- RUBY=/home/espinosa/local/bin/ruby
- $RUBY $CIOROOT/libexec/hashserver.rb
-elif [ $RANK -eq $IFS_RANK ]; then
- ifs_head $STRIPE_SIZE
- /home/zzhang/cio/bin/collector.sh
-elif [ $RANK -lt $IFS_SLAVE ]; then
- ifs_slave $IFS_RANK
+#Core CDM logic
+if [ $IFS_NUM -ne 0 ]; then
+ if [ $RANK -eq $IFS_RANK ]; then
+ ifs_head $STRIPE_SIZE
+ elif [[ $RANK -lt $IFS_SLAVE && $RANK -gt $IFS_RANK ]]; then
+ ifs_slave $IFS_RANK
+ else
+ if [ $RANK -eq 0 ]; then
+ ifs_mount $IFS_RANK
+ $CIOROOT/libexec/falkon/runworker-bgp.sh $1 $2 $3 $4 $5 $6 $7
+ fi
+ fi
else
+ if [ $RANK -eq 0 ]; then
ifs_mount $IFS_RANK
$CIOROOT/libexec/falkon/runworker-bgp.sh $1 $2 $3 $4 $5 $6 $7
+ fi
fi
# Quick hack
Modified: usertools/cio/bin/falkon-start.sh
===================================================================
--- usertools/cio/bin/falkon-start.sh 2009-09-22 18:28:15 UTC (rev 3112)
+++ usertools/cio/bin/falkon-start.sh 2009-09-25 23:02:36 UTC (rev 3113)
@@ -12,4 +12,5 @@
echo "ERROR: CHIRPROOT not defined"
exit 1
fi
-$CIOROOT/libexec/falkon/falkon-start-$CIOARCH.sh $@
+#$CIOROOT/libexec/falkon/falkon-start-$CIOARCH.sh $@
+$CIOROOT/libexec/falkon/falkon-start-bgp_logging.sh $@
Modified: usertools/cio/bin/swift_bgp.sh
===================================================================
--- usertools/cio/bin/swift_bgp.sh 2009-09-22 18:28:15 UTC (rev 3112)
+++ usertools/cio/bin/swift_bgp.sh 2009-09-25 23:02:36 UTC (rev 3113)
@@ -38,7 +38,7 @@
<execution provider="deef" url="http://$ip:50001/wsrf/services/GenericPortal/core/WS/GPFactoryService"/>
<gridftp url="local://localhost"/>
<workdirectory>$workdir</workdirectory>
- <profile namespace="karajan" key="jobThrottle">8</profile>
+ <profile namespace="karajan" key="jobThrottle">2.54</profile>
<profile namespace="karajan" key="initialScore">1000</profile>
</pool>
@@ -55,4 +55,4 @@
exit 1
fi
-swift </dev/null -sites.file ./sites.xml -tc.file ./tc.data $*
+swift </dev/null -sites.file ./sites.xml -tc.file ./tc.data $@
Deleted: usertools/cio/bin/wrapper.sh
===================================================================
--- usertools/cio/bin/wrapper.sh 2009-09-22 18:28:15 UTC (rev 3112)
+++ usertools/cio/bin/wrapper.sh 2009-09-25 23:02:36 UTC (rev 3113)
@@ -1,351 +0,0 @@
-#!/fuse/bin/bash
-# this script must be invoked inside of bash, not plain sh
-PATH=/fuse/bin:/fuse/usr/bin:$PATH
-infosection() {
- echo >& "$INFO"
- echo "_____________________________________________________________________________" >& "$INFO"
- echo >& "$INFO"
- echo " $1" >& "$INFO"
- echo "_____________________________________________________________________________" >& "$INFO"
- echo >& "$INFO"
-}
-
-info() {
- infosection "uname -a"
- uname -a 2>&1 >& "$INFO"
- infosection "id"
- id 2>&1 >& "$INFO"
- infosection "env"
- env 2>&1 >& "$INFO"
- infosection "df"
- df 2>&1 >& "$INFO"
- infosection "/proc/cpuinfo"
- cat /proc/cpuinfo 2>&1 >& "$INFO"
- infosection "/proc/meminfo"
- cat /proc/meminfo 2>&1 >& "$INFO"
- infosection "command line"
- echo $COMMANDLINE 2>&1 >& "$INFO"
-}
-
-logstate() {
- echo "Progress " `date +"%Y-%m-%d %H:%M:%S"` " $@" >& "$INFO"
-}
-
-log() {
- echo "$@" >& "$INFO"
-}
-
-fail() {
- EC=$1
- shift
- log $@
- info
- exit $EC
-}
-
-checkError() {
- if [ "$?" != "0" ]; then
- fail $@
- fi
-}
-
-checkEmpty() {
- if [ "$1" == "" ]; then
- shift
- fail 254 $@
- fi
-}
-
-getarg() {
- NAME=$1
- shift
- VALUE=""
- SHIFTCOUNT=0
- if [ "$1" == "$NAME" ]; then
- shift
- let "SHIFTCOUNT=$SHIFTCOUNT+1"
- while [ "${1:0:1}" != "-" ] && [ "$#" != "0" ]; do
- VALUE="$VALUE $1"
- shift
- let "SHIFTCOUNT=$SHIFTCOUNT+1"
- done
- else
- fail 254 "Missing $NAME argument"
- fi
- VALUE="${VALUE:1}"
-}
-
-openinfo() {
- exec 3<> $1
- INFO=3
-}
-
-closeinfo() {
- exec 3>&-
-}
-
-cioinput() {
- INPUT=$1
- FILEPATH=`dirname $INPUT`
- FILENAME=`basename $INPUT`
- TYPE=${INPUT%%/*}
- echo "INPUT_TYPE: $TYPE" >> /dev/shm/cio
- if [ "$TYPE" == "common" ] && [ -e /dev/shm/share/$FILENAME ]; then
- echo "cioinput(): link for common input $INPUT" >> /dev/shm/cio
- ln -s "/dev/shm/share/$FILENAME" "$DIR/$L"
- elif [ "$TYPE" == "_concurrent" ]; then
- echo "cioinput(): toruscp for intermediate data $INPUT" >> /dev/shm/cio
- echo DIR: `pwd` >> /dev/shm/torus
- mkdir -p $DIR/$FILEPATH
- echo "cioinput(): $INPUT" >> /dev/shm/cio
- /home/zzhang/DHT/bin/DHTcp.sh $INPUT $DIR
- else
- echo "cioinput(): copy from GPFS $INPUT pwd:`pwd` " >> /dev/shm/cio
- cp "$PWD/shared/$L" "$DIR/$L"
- fi
-}
-ciooutput() {
- OUTPUT=$1
- FILEPATH=`dirname $OUTPUT`
- FILENAME=`basename $OUTPUT`
- TYPE=${OUTPUT%%/*}
- echo "OUTPUT_TYPE: $TYPE" >> /dev/shm/cio
-
- if [ "$TYPE" == "_concurrent" ]; then
- echo "ciooutput(): write intermediate data $OUTPUT" >> /dev/shm/cio
- echo `pwd` >> /dev/shm/cio
- /home/zzhang/DHT/bin/DHTregister.sh $OUTPUT $RANK
- else
- echo "ciooutput(): write regular data $OUTPUT" >> /dev/shm/cio
- #dd if="$OUTPUT" of="$WFDIR/shared/$OUTPUT" bs=128k
- #echo "$OUTPUT /chirp/multi/${CHIRP_ADD}@stripe/" >> /dev/shm/chirp_add
- cp "$OUTPUT" /chirp/multi/${CHIRP_ADD}@stripe/
- fi
-}
-
-extractfile()
-{
- ARCHIVE="$1"
- START_LOCK=$tmp/mtio-lock-started
- END_LOCK=$tmp/mtio-lock-finished
- current=`pwd`
- cd $tmp
- mkdir ${START_LOCK}
-
- EXIT_CODE=$?
- # EXIT_CODE=0 ###### FOR TESTING - forces creation of new dir
-
- if [ "${EXIT_CODE}" -ne "0" ]; then
- echo "waiting for data to be extracted"
- ((i = 0))
- while (( i == 0 ))
- do
- if [ -d "${END_LOCK}" ]; then
- ((i = 1))
- fi
-
- if [ ! -d "${END_LOCK}" ]; then
- /bin/sleep 1
- fi
- done
- else
- if [ ! -f "${ARCHIVE}" ]; then
- echo "archive doesn't exist... exiting"
- rmdir $START_LOCK
- exit -2
- fi
-
- echo "extract the archive"
- cd $tmp
- echo "extracted" >> /dev/shm/extract
- tar xf $ARCHIVE
-
- EXIT_CODE=$?
-
- if [ "${EXIT_CODE}" -ne "0" ]; then
- echo "Error in untar of ${ARCHIVE} /... exit code ${EXIT_CODE}"
- exit ${EXIT_CODE}
- rmdir $START_LOCK
- fi
-
- mkdir -p ${END_LOCK}
-
- EXIT_CODE=$?
- if [ "${EXIT_CODE}" -ne "0" ]; then
- echo "Error in mkdir ${END_LOCK}... exit code ${EXIT_CODE}"
- rmdir $START_LOCK
- exit ${EXIT_CODE}
- fi
- fi
- cd $current
-}
-#/home/zzhang/bashtest/tar.sh
-tmp=/dev/shm/share
-extractfile common.tar
-cd $PWD
-
-RANK=`echo $CONTROL_INIT | awk -F, '{print $4}'`
-echo $@ >> /dev/shm/log
-COMMANDLINE=$@
-WFDIR=$PWD
-ID=$1
-checkEmpty "$ID" "Missing job ID"
-
-shift
-
-getarg "-jobdir" "$@"
-JOBDIR=$VALUE
-shift $SHIFTCOUNT
-
-checkEmpty "$JOBDIR" "Missing job directory prefix"
-mkdir -p /dev/shm/swift-info/$JOBDIR
-
-closeinfo
-openinfo "/dev/shm/swift-info/$JOBDIR/${ID}-info"
-#openinfo "/dev/null"
-
-logstate "LOG_START"
-
-getarg "-e" "$@"
-EXEC=$VALUE
-shift $SHIFTCOUNT
-
-getarg "-out" "$@"
-STDOUT=$VALUE
-shift $SHIFTCOUNT
-
-getarg "-err" "$@"
-STDERR=$VALUE
-shift $SHIFTCOUNT
-
-getarg "-i" "$@"
-STDIN=$VALUE
-shift $SHIFTCOUNT
-
-getarg "-d" "$@"
-DIRS=$VALUE
-shift $SHIFTCOUNT
-
-getarg "-if" "$@"
-INF=$VALUE
-shift $SHIFTCOUNT
-
-getarg "-of" "$@"
-OUTF=$VALUE
-shift $SHIFTCOUNT
-
-getarg "-k" "$@"
-KICKSTART=$VALUE
-shift $SHIFTCOUNT
-
-if [ "$1" == "-a" ]; then
- shift
-else
- fail 254 "Missing arguments (-a option)"
-fi
-
-if [ "X$SWIFT_JOBDIR_PATH" != "X" ]; then
- DIR=${SWIFT_JOBDIR_PATH}/$JOBDIR/$ID
- COPYNOTLINK=1
-else
- DIR=/dev/shm/swift-work/$JOBDIR/$ID
- COPYNOTLINK=0
-fi
-
-PATH=$PATH:/bin:/usr/bin
-
-if [ "$PATHPREFIX" != "" ]; then
-export PATH=$PATHPREFIX:$PATH
-fi
-
-IFS="^"
-
-logstate "CREATE_JOBDIR"
-mkdir -p $DIR
-
-logstate "CREATE_INPUTDIR"
-
-for D in $DIRS ; do
- mkdir -p "$DIR/$D"
- checkError 254 "Failed to create input directory $D"
-done
-
-#cd $DIR
-logstate "LINK_INPUTS"
-for L in $INF ; do
- if [ $COPYNOTLINK = 1 ]; then
- cp "$PWD/shared/$L" "$DIR/$L"
- checkError 254 "Failed to copy input file $L"
- else
- cioinput $L
- #cp "$PWD/shared/$L" "$DIR/$L"
- checkError 254 "Failed to link input file $L `ls -l $DIR/$L`"
- fi
-done
-
-logstate "EXECUTE"
-
-cd $DIR
-
-if [ "$KICKSTART" == "" ]; then
- if [ "$STDIN" == "" ]; then
- "$EXEC" "$@" 1>"$STDOUT" 2>"$STDERR"
- else
- "$EXEC" "$@" 1>"$STDOUT" 2>"$STDERR" <"$STDIN"
- fi
- checkError $? "Exit code $?"
-else
- if [ ! -f "$KICKSTART" ]; then
- fail 254 "The Kickstart executable ($KICKSTART) was not found"
- elif [ ! -x "$KICKSTART" ]; then
- fail 254 "The Kickstart executable ($KICKSTART) does not have the executable bit set"
- else
- mkdir -p $WFDIR/kickstart/$JOBDIR
- if [ "$STDIN" == "" ]; then
- "$KICKSTART" -H -o "$STDOUT" -e "$STDERR" "$TMPEXEC" "$@" 1>kickstart.xml 2>"$STDERR"
- else
- "$KICKSTART" -H -o "$STDOUT" -i "$STDIN" -e "$STDERR" "$TMPEXEC" "$@" 1>kickstart.xml 2>"$STDERR"
- fi
- export APPEXIT=$?
- mv -f kickstart.xml "$WFDIR/kickstart/$JOBDIR/$ID-kickstart.xml" 2>&1 >& "$INFO"
- checkError 254 "Failed to copy Kickstart record to shared directory"
- if [ "$APPEXIT" != "0" ]; then
- fail $APPEXIT "Exit code $APPEXIT"
- fi
- fi
-fi
-
-logstate "EXECUTE_DONE"
-
-MISSING=
-for O in $OUTF ; do
- if [ ! -f "$DIR/$O" ]; then
- if [ "$MISSING" == "" ]; then
- MISSING=$O
- else
- MISSING="$MISSING, $O"
- fi
- fi
-done
-if [ "$MISSING" != "" ]; then
- fail 254 "The following output files were not created by the application: $MISSING"
-fi
-
-logstate "COPYING_OUTPUTS"
-for O in $OUTF ; do
- #cp "$DIR/$O" "$WFDIR/shared/$O" 2>&1 >& "$INFO"
- #cp "$DIR/$O" "$WFDIR/shared/$O"
- #dd if="$DIR/$O" of="$WFDIR/shared/$JOBDIR/$O" bs=128k
- #dd if="$DIR/$O" of="$WFDIR/shared/$O" bs=128k
- ciooutput $O
- checkError 254 "Failed to copy output file $O to shared directory"
-done
-
-logstate "RM_JOBDIR"
-
-closeinfo
-#rm -f "$WFDIR/info/$JOBDIR/${ID}-info"
-#echo "$WFDIR/info/$JOBDIR/${ID}-info" >> /dev/shm/log
-#mkdir -p "$WFDIR/info/$JOBDIR/"
-#dd if=/dev/shm/swift-info/$JOBDIR/${ID}-info of="$WFDIR/info/$JOBDIR/${ID}-info" bs=128k
-#dd if=/dev/shm/swift-info/$JOBDIR/${ID}-info of="/fuse/tmp/${ID}-info" bs=128k
Deleted: usertools/cio/libexec/falkon/falkon-start-bgp-logging.sh
===================================================================
--- usertools/cio/libexec/falkon/falkon-start-bgp-logging.sh 2009-09-22 18:28:15 UTC (rev 3112)
+++ usertools/cio/libexec/falkon/falkon-start-bgp-logging.sh 2009-09-25 23:02:36 UTC (rev 3113)
@@ -1,148 +0,0 @@
-#!/bin/bash
-
-# Patched falkon-start-bgp-ram.sh which logs GPFS IO activity in ZeptoOS
-CIOROOT=/home/espinosa/cio
-
-
- if [ -z "$3" ]; then
- echo "usage: $0 <QueueName> <NumNodes> <MaxTimeMin>"
- echo "usage: $0 prod 1024 60"
- echo "-- or --"
- echo "usage: $0 <QueueName> <NumNodes> <MaxTimeMin> <WorkersPerNode>"
- echo "usage: $0 prod 1024 60 4"
- exit 1
- fi
-
-
-if [ -z "${FALKON_HOME}" ]; then
- echo "ERROR: environment variable FALKON_HOME not defined"
- exit 1
-fi
-
-
-QUEUE_NAME=$1
-PROFILE_NAME="zeptocn-log"
-NUM_NODES=$2
-let NUM_ION=NUM_NODES/64
-MAX_TIME_MIN=$3
-SERVICE_IP="192.168.1.254"
-SERVICE_PORT1=55000
-SERVICE_PORT2=55001
-WORKERS_PER_NODE=4
-if [ ! -z $4 ];then
- WORKERS_PER_NODE=$4
-fi
-
-
-
-cp $CIOROOT/tools/zoid/logging-script.sh ${HOME}/zoid-user-script.sh
-chmod +x ${HOME}/zoid-user-script.sh
-
-FALKON_JOB_ID=`falkon-id-get.sh N/A`
-EXIT_CODE=$?
-
-
- if [ "${EXIT_CODE}" -ne "0" ]; then
- echo "Error in geting a unique falkon ID.. 'falkon-get-id.sh N/A'"
- cqdel ${ZOID_JOB_ID}
- exit ${EXIT_CODE}
- fi
-
-
-
-ZOID_JOB_ID=`cqsub -q ${QUEUE_NAME} -k ${PROFILE_NAME} -C ${HOME} -t ${MAX_TIME_MIN} -n ${NUM_NODES} -e LD_LIBRARY_PATH=/lib:/fuse/lib:/fuse/usr/lib /bgsys/linux/1.2.020080512/bin/bash /fuse/${FALKON_WORKER_HOME}/run.worker-c-bgp.sh ${SERVICE_IP} ${SERVICE_PORT1} ${SERVICE_PORT2} ${WORKERS_PER_NODE} ${USER} ${FALKON_JOB_ID} ${FALKON_ROOT}`
-
-echo $ZOID_JOB_ID $FALKON_JOB_ID $QUEUE_NAME $NUM_NODES $MAX_TIME_MIN $WORKERS_PER_NODE >>$HOME/.falkonjobs
-
-EXIT_CODE=$?
-
- if [ "${EXIT_CODE}" -ne "0" ]; then
- echo "Error in submitting job to Cobalt.. 'cqsub -q ${QUEUE_NAME} -k ${PROFILE_NAME} -C ${HOME} -t ${MAX_TIME_MIN} -n ${NUM_NODES} -e LD_LIBRARY_PATH=/lib:/fuse/lib:/fuse/usr/lib /bgsys/linux/1.2.020080512/bin/bash /fuse/${FALKON_WORKER_HOME}/run.worker-c-bgp.sh ${SERVICE_IP} ${SERVICE_PORT1} ${SERVICE_PORT2} ${WORKERS_PER_NODE} ${USER} ${FALKON_JOB_ID} ${FALKON_ROOT}' "
- exit ${EXIT_CODE}
- fi
-
-falkon-id-update.sh ${FALKON_JOB_ID} ${ZOID_JOB_ID}
-
-EXIT_CODE=$?
-
- if [ "${EXIT_CODE}" -ne "0" ]; then
- echo "Error in updating cobalt job info for falkon.. 'falkon-update-id.sh ${ZOID_JOB_ID}'"
- cqdel ${ZOID_JOB_ID}
- exit ${EXIT_CODE}
- fi
-
-
-FALKON_JOBID_HOME=${FALKON_ROOT}/users/${USER}/${FALKON_JOB_ID}
-
-echo "Submitted job ${ZOID_JOB_ID} to Cobalt, creating the job specific Falkon tree for logs and configuration in ${FALKON_JOBID_HOME}..."
-
-mkdir -p ${FALKON_JOBID_HOME}
-
-cp ${FALKON_HOME}/falkon.env.bgp* ${FALKON_JOBID_HOME}/
-cp -r ${FALKON_HOME}/config ${FALKON_JOBID_HOME}/
-cp ${FALKON_JOBID_HOME}/config/Client-service-URIs.config2 ${FALKON_JOBID_HOME}/config/Client-service-URIs.config
-mkdir -p ${FALKON_JOBID_HOME}/logs/client ${FALKON_JOBID_HOME}/logs/service ${FALKON_JOBID_HOME}/logs/provisioner ${FALKON_JOBID_HOME}/logs/worker
-
-
-DATE=`date +%s`
-echo "$DATE: pre-creating log dirs for Falkon service..."
-RACK_START=0
-RACK_END=48
-SEQUENCE_DIR=`seq -w ${RACK_START} ${RACK_END}`
-PSET_START=1
-PSET_END=16
-for a in ${SEQUENCE_DIR}
-do
- for ((b=${PSET_START}; b <= ${PSET_END} ; b++)) # Double parentheses, and "LIMIT" with no "$".
- do
- DIR_NAME="ion-R${a}-${b}"
- mkdir -p ${FALKON_JOBID_HOME}/logs/service/$DIR_NAME
- done
-done
-
-for ((b=${PSET_START}; b <= ${PSET_END} ; b++)) # Double parentheses, and "LIMIT" with no "$".
-do
- DIR_NAME="ion-${b}"
- mkdir -p ${FALKON_JOBID_HOME}/logs/service/$DIR_NAME
-done
-
-
-DATE=`date +%s`
-echo "$DATE: done creating log dirs for Falkon service!"
-
-
-
-FALKON_HOME_RAM=/tmp/${USER}/falkon
-
-ln -s ${FALKON_HOME}/apps ${FALKON_JOBID_HOME}/apps
-ln -s ${FALKON_HOME_RAM}/container ${FALKON_JOBID_HOME}/container
-ln -s ${FALKON_HOME}/service ${FALKON_JOBID_HOME}/service
-ln -s ${FALKON_HOME}/worker ${FALKON_JOBID_HOME}/worker
-ln -s ${FALKON_HOME}/AstroPortal ${FALKON_JOBID_HOME}/AstroPortal
-ln -s ${FALKON_HOME}/client ${FALKON_JOBID_HOME}/client
-ln -s ${FALKON_HOME}/monitor ${FALKON_JOBID_HOME}/monitor
-ln -s ${FALKON_HOME}/bin ${FALKON_JOBID_HOME}/bin
-ln -s ${FALKON_HOME}/config ${FALKON_JOBID_HOME}/config
-ln -s ${FALKON_HOME}/ploticus ${FALKON_JOBID_HOME}/ploticus
-ln -s ${FALKON_HOME}/webserver ${FALKON_JOBID_HOME}/webserver
-ln -s ${FALKON_HOME}/workloads ${FALKON_JOBID_HOME}/workloads
-ln -s ${FALKON_HOME}/id ${FALKON_JOBID_HOME}/id
-ln -s ${FALKON_HOME}/apache-ant-1.7.0 ${FALKON_JOBID_HOME}/apache-ant-1.7.0
-ln -s ${FALKON_HOME}/ibm-java2-ppc64-50 ${FALKON_JOBID_HOME}/ibm-java2-ppc64-50
-ln -s ${FALKON_HOME_RAM}/ibm-java2-ppc-50 ${FALKON_JOBID_HOME}/ibm-java2-ppc-50
-ln -s ${FALKON_HOME}/falkon.tgz ${FALKON_JOBID_HOME}/falkon.tgz
-
-
-if [ ! -d "${FALKON_JOBID_HOME}" ]; then
- echo "ERROR: invalid path ${FALKON_JOBID_HOME}... exiting"
- cqdel ${ZOID_JOB_ID}
- exit 1
-fi
-
-echo "Succesfully submitted the job to Cobalt, and setup job specific Falkon tree!"
-echo "To monitor the job status, type 'cqstat | grep ${USER}'; once it is in running state, you can use the Falkon specific command ...."
-echo "To submit your Falkon-based workload, type: ....; you can do this any time, the falkon workload will wait for the resources to come online, and will only be submitted when everything is ready; the script is run in the background, so the workload will run even if the ssh session gets disconnected."
-echo ""
-echo "Remember, your job id is ${ZOID_JOB_ID}, and if you need to look through the logs manually for anything, remember that you can find them at ${HOME}/${ZOID_JOB_ID}.output, ${HOME}/${ZOID_JOB_ID}.error, and ${FALKON_JOBID_HOME}/logs/..."
-
-
Modified: usertools/cio/libexec/falkon/falkon-start-bgp.sh
===================================================================
--- usertools/cio/libexec/falkon/falkon-start-bgp.sh 2009-09-22 18:28:15 UTC (rev 3112)
+++ usertools/cio/libexec/falkon/falkon-start-bgp.sh 2009-09-25 23:02:36 UTC (rev 3113)
@@ -24,7 +24,7 @@
exit 1
fi
-PROFILE_NAME="zeptocn-swift"
+PROFILE_NAME="zeptoos"
QUEUE_NAME=$1
NUM_NODES=$2
let NUM_ION=NUM_NODES/64
@@ -46,7 +46,7 @@
STR=$6
fi
-cp $CIOROOT/libexec/zoid/logging-script.sh ${HOME}/zoid-user-script.sh
+cp ${FALKON_HOME}/bin/zoid-user-script.sh ${HOME}/zoid-user-script.sh
chmod +x ${HOME}/zoid-user-script.sh
FALKON_JOB_ID=`falkon-id-get.sh N/A`
Copied: usertools/cio/libexec/falkon/falkon-start-bgp_logging.sh (from rev 2853, usertools/cio/libexec/falkon/falkon-start-bgp-logging.sh)
===================================================================
--- usertools/cio/libexec/falkon/falkon-start-bgp_logging.sh (rev 0)
+++ usertools/cio/libexec/falkon/falkon-start-bgp_logging.sh 2009-09-25 23:02:36 UTC (rev 3113)
@@ -0,0 +1,165 @@
+#!/bin/bash
+
+if [ $# -lt 3 ]; then
+ cat << EOF
+Usage: $0 <QueueName> <NumNodes> <WallTime>
+Example: $0 prod 1024 60
+--or--
+Usage: $0 <QueueName> <NumNodes> <WallTime> \
+ <WorkersPerNode> <NumIFS> <SizeIFS>
+EOF
+ exit 1
+fi
+
+if [ -z $FALKON_HOME ]; then
+ echo "ERROR: environment variable FALKON_HOME not defined"
+ exit 1
+fi
+if [ -z $CIOROOT ]; then
+ echo "ERROR: CIOROOT env not defined"
+ exit 1
+fi
+if [[ $CIOARCH != "bgp" ]]; then
+ echo "ERROR: Wrong architecture. Must be bgp"
+ exit 1
+fi
+
+PROFILE_NAME="zeptoos"
+QUEUE_NAME=$1
+NUM_NODES=$2
+let NUM_ION=NUM_NODES/64
+MAX_TIME_MIN=$3
+SERVICE_IP="192.168.1.254"
+SERVICE_PORT1=55000
+SERVICE_PORT2=55001
+
+WORKERS_PER_NODE=4
+DATA_NUM=1
+STR=1
+if [ -n "$4" ];then
+ WORKERS_PER_NODE=$4
+fi
+if [ -n "$5" ];then
+ DATA_NUM=$5
+fi
+if [ -n "$6" ];then
+ STR=$6
+fi
+
+cp $CIOROOT/libexec/zoid/logging-script.sh ${HOME}/zoid-user-script.sh
+chmod +x ${HOME}/zoid-user-script.sh
+
+FALKON_JOB_ID=`falkon-id-get.sh N/A`
+EXIT_CODE=$?
+
+if [ "${EXIT_CODE}" -ne "0" ]; then
+ echo "Error in geting a unique falkon ID.. 'falkon-get-id.sh N/A'"
+ cqdel ${ZOID_JOB_ID}
+ exit ${EXIT_CODE}
+fi
+
+
+
+WORKER_SCRIPT=$CIOROOT/bin/ciologic-$CIOARCH.sh
+SUBMIT_CMD="cqsub -q ${QUEUE_NAME} -k ${PROFILE_NAME} -C ${HOME} -t \
+ ${MAX_TIME_MIN} -n ${NUM_NODES} \
+ -e LD_LIBRARY_PATH=/lib:/fuse/lib:/fuse/usr/lib:CIOROOT=$CIOROOT:CIOARCH=$CIOARCH:CHIRPROOT=$CHIRPROOT \
+ /bgsys/linux/1.2.020080512/bin/bash $WORKER_SCRIPT \
+ ${SERVICE_IP} ${SERVICE_PORT1} ${SERVICE_PORT2} ${WORKERS_PER_NODE} \
+ ${USER} ${FALKON_JOB_ID} ${FALKON_ROOT} \
+ $NUM_NODES $DATA_NUM $STR" # MTIO parameters
+
+ZOID_JOB_ID=`$SUBMIT_CMD`
+EXIT_CODE=$?
+
+if [ "${EXIT_CODE}" -ne "0" ]; then
+ echo "Error in submitting job to Cobalt.. $SUBMIT_CMD"
+ exit ${EXIT_CODE}
+fi
+
+falkon-id-update.sh ${FALKON_JOB_ID} ${ZOID_JOB_ID}
+
+EXIT_CODE=$?
+
+if [ "${EXIT_CODE}" -ne "0" ]; then
+ echo "Error in updating cobalt job info for falkon.. 'falkon-update-id.sh ${ZOID_JOB_ID}'"
+ cqdel ${ZOID_JOB_ID}
+ exit ${EXIT_CODE}
+fi
+
+
+FALKON_JOBID_HOME=${FALKON_ROOT}/users/${USER}/${FALKON_JOB_ID}
+
+echo "Submitted job ${ZOID_JOB_ID} to Cobalt, creating the job specific Falkon tree for logs and configuration in ${FALKON_JOBID_HOME}..."
+
+mkdir -p ${FALKON_JOBID_HOME}
+
+cp ${FALKON_HOME}/falkon.env.bgp* ${FALKON_JOBID_HOME}/
+cp -r ${FALKON_HOME}/config ${FALKON_JOBID_HOME}/
+cp ${FALKON_JOBID_HOME}/config/Client-service-URIs.config2 ${FALKON_JOBID_HOME}/config/Client-service-URIs.config
+mkdir -p ${FALKON_JOBID_HOME}/logs/client ${FALKON_JOBID_HOME}/logs/service ${FALKON_JOBID_HOME}/logs/provisioner ${FALKON_JOBID_HOME}/logs/worker
+
+
+DATE=`date +%s`
+echo "$DATE: pre-creating log dirs for Falkon service..."
+RACK_START=0
+RACK_END=48
+SEQUENCE_DIR=`seq -w ${RACK_START} ${RACK_END}`
+PSET_START=1
+PSET_END=16
+for a in ${SEQUENCE_DIR}
+do
+ for ((b=${PSET_START}; b <= ${PSET_END} ; b++)) # Double parentheses, and "LIMIT" with no "$".
+ do
+ DIR_NAME="ion-R${a}-${b}"
+ mkdir -p ${FALKON_JOBID_HOME}/logs/service/$DIR_NAME
+ done
+done
+
+for ((b=${PSET_START}; b <= ${PSET_END} ; b++)) # Double parentheses, and "LIMIT" with no "$".
+do
+ DIR_NAME="ion-${b}"
+ mkdir -p ${FALKON_JOBID_HOME}/logs/service/$DIR_NAME
+done
+
+
+DATE=`date +%s`
+echo "$DATE: done creating log dirs for Falkon service!"
+
+FALKON_HOME_RAM=/tmp/${USER}/falkon
+
+ln -s ${FALKON_HOME}/apps ${FALKON_JOBID_HOME}/apps
+ln -s ${FALKON_HOME_RAM}/container ${FALKON_JOBID_HOME}/container
+ln -s ${FALKON_HOME}/service ${FALKON_JOBID_HOME}/service
+ln -s ${FALKON_HOME}/worker ${FALKON_JOBID_HOME}/worker
+ln -s ${FALKON_HOME}/AstroPortal ${FALKON_JOBID_HOME}/AstroPortal
+ln -s ${FALKON_HOME}/client ${FALKON_JOBID_HOME}/client
+ln -s ${FALKON_HOME}/monitor ${FALKON_JOBID_HOME}/monitor
+ln -s ${FALKON_HOME}/bin ${FALKON_JOBID_HOME}/bin
+ln -s ${FALKON_HOME}/config ${FALKON_JOBID_HOME}/config
+ln -s ${FALKON_HOME}/ploticus ${FALKON_JOBID_HOME}/ploticus
+ln -s ${FALKON_HOME}/webserver ${FALKON_JOBID_HOME}/webserver
+ln -s ${FALKON_HOME}/workloads ${FALKON_JOBID_HOME}/workloads
+ln -s ${FALKON_HOME}/id ${FALKON_JOBID_HOME}/id
+ln -s ${FALKON_HOME}/apache-ant-1.7.0 ${FALKON_JOBID_HOME}/apache-ant-1.7.0
+ln -s ${FALKON_HOME}/ibm-java2-ppc64-50 ${FALKON_JOBID_HOME}/ibm-java2-ppc64-50
+ln -s ${FALKON_HOME_RAM}/ibm-java2-ppc-50 ${FALKON_JOBID_HOME}/ibm-java2-ppc-50
+ln -s ${FALKON_HOME}/falkon.tgz ${FALKON_JOBID_HOME}/falkon.tgz
+
+
+if [ ! -d "${FALKON_JOBID_HOME}" ]; then
+ echo "ERROR: invalid path ${FALKON_JOBID_HOME}... exiting"
+ cqdel ${ZOID_JOB_ID}
+ exit 1
+fi
+
+echo $ZOID_JOB_ID $FALKON_JOB_ID $QUEUE_NAME \
+ $NUM_NODES $MAX_TIME_MIN $WORKERS_PER_NODE >>$HOME/.falkonjobs
+
+echo "Succesfully submitted the job to Cobalt, and setup job specific Falkon tree!"
+echo "To monitor the job status, type 'cqstat | grep ${USER}'; once it is in running state, you can use the Falkon specific command ...."
+echo "To submit your Falkon-based workload, type: ....; you can do this any time, the falkon workload will wait for the resources to come online, and will only be submitted when everything is ready; the script is run in the background, so the workload will run even if the ssh session gets disconnected."
+echo ""
+echo "Remember, your job id is ${ZOID_JOB_ID}, and if you need to look through the logs manually for anything, remember that you can find them at ${HOME}/${ZOID_JOB_ID}.output, ${HOME}/${ZOID_JOB_ID}.error, and ${FALKON_JOBID_HOME}/logs/..."
+
+
More information about the Swift-commit
mailing list