[Swift-commit] r7230 - SwiftApps/Swift-MapRed/mapred_combiner_V4

yadunandb at ci.uchicago.edu yadunandb at ci.uchicago.edu
Thu Oct 24 15:39:05 CDT 2013


Author: yadunandb
Date: 2013-10-24 15:39:05 -0500 (Thu, 24 Oct 2013)
New Revision: 7230

Added:
   SwiftApps/Swift-MapRed/mapred_combiner_V4/map_wrapper.sh
   SwiftApps/Swift-MapRed/mapred_combiner_V4/reduce_wrapper.sh
Log:
Adding missed files

Added: SwiftApps/Swift-MapRed/mapred_combiner_V4/map_wrapper.sh
===================================================================
--- SwiftApps/Swift-MapRed/mapred_combiner_V4/map_wrapper.sh	                        (rev 0)
+++ SwiftApps/Swift-MapRed/mapred_combiner_V4/map_wrapper.sh	2013-10-24 20:39:05 UTC (rev 7230)
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# By default with ARG1:100 and SLICESIZE=10000, this script will generate
+# 10^6 records.
+if [ ! -z $1 ] && [ -f $1 ]; then
+    USER_MAP_SCRIPT=$1;
+    shift 1;
+fi
+MAP_ARGS=$*
+
+FILE="input_$RANDOM.txt"
+HOSTNAME=$(hostname -f)
+if   [[ "$HOSTNAME" == *midway* ]]; then # On midway node
+    EXECSERVER=/scratch/midway/yadunand/bin/exec_server
+    LOGFOLDER=/scratch/midway/yadunand
+    FOLDER="/dev/shm"
+    HOSTNAME="midway_$HOSTNAME"
+
+elif [[ "$HOSTNAME" == *nid* ]]; then    # On beagle node
+    EXECSERVER=/lustre/beagle/yadunandb/bin/exec_server
+    LOGFOLDER=/lustre/beagle/yadunandb/Swift-MapRed/mapred_combiner_V5
+    FOLDER="/dev/shm"
+    HOSTNAME="beagle_$HOSTNAME"
+
+fi
+
+TIMEOUT=1200
+NC_LISTEN_PORT=29900
+# Start services on Node
+JOBS_LEVEL=`echo $PWD | grep -o ".*jobs"`
+if [ "$?" == "0" ]
+then
+    mkdir $JOBS_LEVEL/CHIRPING
+    if [ "$?" == "0" ]
+    then
+	    killall -u $USER chirp_server;
+	    echo "unix:$USER rwlds" >  $FOLDER/acl.conf
+	    echo "hostname:* rwl"   >> $FOLDER/acl.conf
+        which chirp_server 1>&2
+	    timeout $TIMEOUT chirp_server -A $FOLDER/acl.conf -r $FOLDER &
+        timeout $TIMEOUT $EXECSERVER &> $LOGFOLDER/exec_server_log-$HOSTNAME.log &
+    fi
+    FILE=$FOLDER/$FILE;
+else
+    FILE=$PWD/$FILE
+fi
+
+chmod a+x $USER_MAP_SCRIPT
+./$USER_MAP_SCRIPT ${MAP_ARGS[*]} > $FILE
+echo "$HOSTNAME $FILE"
+ps -u $USER 1>&2
+ls -thor $FILE 1>&2
+exit 0;


Property changes on: SwiftApps/Swift-MapRed/mapred_combiner_V4/map_wrapper.sh
___________________________________________________________________
Added: svn:executable
   + *

Added: SwiftApps/Swift-MapRed/mapred_combiner_V4/reduce_wrapper.sh
===================================================================
--- SwiftApps/Swift-MapRed/mapred_combiner_V4/reduce_wrapper.sh	                        (rev 0)
+++ SwiftApps/Swift-MapRed/mapred_combiner_V4/reduce_wrapper.sh	2013-10-24 20:39:05 UTC (rev 7230)
@@ -0,0 +1,154 @@
+#!/bin/bash
+# The reducer is expecting to receive filenames as args
+# Each file would contains strings in the format HOSTNAME FILEPATH
+
+# Reduce options:
+# "naive" : Takes multiple files and applies the reduce script on them
+# "plain" : Fetches the files from the filepointers and then applies the reduce script
+# "local" : Does a local combine on every unique node and then uses plain reduce to finish
+
+
+# The files passed here are filepointers which need to be resolved
+# before they can be processed by the core reduce function.
+ARGS=($*)
+USER_REDUCE_SCRIPT=${ARGS[0]}
+chmod a+x $USER_REDUCE_SCRIPT
+CMD=${ARGS[1]}
+FILES=(${ARGS[*]:2})
+LOGS=ON      # ON | OFF
+
+HOSTNAME=$(hostname -f)
+if   [[ "$HOSTNAME" == *midway* ]]; then # On midway node
+    EXECSERVER=/scratch/midway/yadunand/bin/exec_server
+    EXECCLIENT=/scratch/midway/yadunand/bin/exec_client
+    LOGFOLDER=/scratch/midway/yadunand
+    FOLDER="/dev/shm"
+    HOSTID="midway_$HOSTNAME"
+    SITE="midway"
+
+elif [[ "$HOSTNAME" == *nid* ]]; then    # On beagle node
+    EXECSERVER=/lustre/beagle/yadunandb/bin/exec_server
+    EXECCLIENT=/lustre/beagle/yadunandb/bin/exec_client
+    LOGFOLDER=/lustre/beagle/yadunandb/Swift-MapRed/
+    FOLDER="/dev/shm"
+    HOSTID="beagle_$HOSTNAME"
+    SITE="beagle"
+fi
+
+log()
+{
+    [ "$LOGS" == "ON" ] && echo -e "[$(date +'%Y-%m-%d %H:%M:%S.%N')] : $*" 1>&2
+}
+
+fetch()
+{
+    log "Fetch args : \n$*\n"
+    ID=$1
+    TOKEN=$2
+    IFS=$'\ ' PTR=($(echo $TOKEN | sed 's/_/\ /'))
+    REMOTE=${PTR[1]}
+    FILE=${PTR[2]}
+    TARGETFILE=$(basename $FILE)
+    REPODIR=$(dirname $FILE)
+
+    if [ "$REMOTE" == "$HOSTNAME" ]
+    then # Move to an identifiable file if local
+        log "mv $FILE $REPODIR/$ID.$RANDOM.imd2"
+        #ls -thor $FILE 1>&2
+        [ ! -f $FILE ] && echo "$FILE missing" 1>&2
+        mv $FILE $REPODIR/$ID.$RANDOM.imd2
+        #ls -thor $REPODIR/$ID.$RANDOM.imd2 1>&2
+    else # Fetch to an identifiable file if remote
+        log "chirp_get $REMOTE $TARGETFILE $REPODIR/$ID.$RANDOM.imd2"
+        chirp_get $REMOTE $TARGETFILE $REPODIR/$ID.$RANDOM.imd2
+    fi
+}
+
+plain ()
+{
+    ID=$1; shift
+    IFS=$'\r\n' FILES=($(grep -h "$SITE" $*))
+    for file_token in ${FILES[*]}
+    do
+        log "fetch $ID $file_token &"
+        fetch $ID $file_token &
+        log "fetch $ID $file_token &"
+    done
+    wait
+    #cat $(reduce $FOLDER/$ID*imd2)
+    log "./$USER_REDUCE_SCRIPT $FOLDER/$ID*imd2"
+    ./$USER_REDUCE_SCRIPT $FOLDER/$ID*imd2
+    log "./$USER_REDUCE_SCRIPT $FOLDER/$ID*imd2"
+    rm -rf $FOLDER/$ID*imd2 &> /dev/null &
+    exit 0
+}
+
+#TODO: FIX for changes in reduce definition
+local_combine_fetch_plain()
+{
+    ID=$1; shift
+    LOCATIONS=($(awk '{ print $1 }' $* | grep $SITE | sort -u))
+    log "Starting local_combiner_fetch_plain"
+    for LOCATION in ${LOCATIONS[*]}
+    do
+        Node=($(echo "$LOCATION" | sed 's/[^ ]*_//' ))
+        FILES=($(grep -h $Node $* | awk '{ print $2 }'))
+############## LOCAL COMBINER CODE ###################
+        cat <<EOF   > $Node.sh
+#!/bin/bash
+ID=$RANDOM
+FILES=(${FILES[*]})
+SITE=$SITE
+reduce()
+{
+EOF
+        cat $USER_REDUCE_SCRIPT >> $Node.sh
+        cat <<'EOF' >> $Node.sh
+}
+DIRNAME=$(dirname ${FILES[0]})
+RESULT=$DIRNAME/$(hostname -f).$RANDOM.comb
+reduce ${FILES[*]} > $RESULT
+echo -e "$SITE"_"$(hostname -f) $RESULT"
+EOF
+        chmod a+x $Node.sh;
+############# END LOCAL COMBINER #####################
+        #TODO: Move client with swift ?
+
+        cp $EXECCLIENT ./
+        {
+            TEMP=`./exec_client $Node $Node.sh`
+            # Fetch will return files in $FOLDER which match comb.$ID*imd2
+            fetch "comb.$ID" "${TEMP[*]}"
+        } &
+    done
+    wait
+    log "Finished local_combiner_fetch"
+    ls $FOLDER/comb.$ID*imd2 1>&2
+    ./$USER_REDUCE_SCRIPT $FOLDER/comb.$ID*imd2
+    log "Finished running ./$USER_REDUCE_SCRIPT $FOLDER/comb.$ID*imd2"
+}
+
+ID=$RANDOM
+case "$CMD" in
+    "naive")
+        echo "Using naive reduce" 1>&2
+        ./$USER_REDUCE_SCRIPT ${FILES[*]}
+        ;;
+    "plain")
+        echo "Using plain reduce" 1>&2
+        echo "plain $ID ${FILES[*]}" 1>&2
+        plain $ID ${FILES[*]}
+        ;;
+    "local")
+        echo "Using local combiners" 1>&2
+        local_combine_fetch_plain $ID ${FILES[*]}
+        ;;
+    "local_k_way")
+        echo "Using local combiners" 1>&2
+        local_combine $ID ${FILES[*]}
+        ;;
+    *)
+        echo "ERROR: Unknown reduction method requested" 1>&2
+        ;;
+esac
+exit 0


Property changes on: SwiftApps/Swift-MapRed/mapred_combiner_V4/reduce_wrapper.sh
___________________________________________________________________
Added: svn:executable
   + *




More information about the Swift-commit mailing list