[Swift-commit] r7455 - provenancedb

lgadelha at ci.uchicago.edu lgadelha at ci.uchicago.edu
Tue Dec 31 06:17:28 CST 2013


Author: lgadelha
Date: 2013-12-31 06:17:27 -0600 (Tue, 31 Dec 2013)
New Revision: 7455

Modified:
   provenancedb/import-run-to-sql
   provenancedb/prepare-for-import
   provenancedb/prepare-provenance-chart
   provenancedb/prov-to-sql.sh
   provenancedb/swift-prov-import-all-logs
Log:
Minor fixes


Modified: provenancedb/import-run-to-sql
===================================================================
--- provenancedb/import-run-to-sql	2013-12-28 01:24:17 UTC (rev 7454)
+++ provenancedb/import-run-to-sql	2013-12-31 12:17:27 UTC (rev 7455)
@@ -9,5 +9,5 @@
 # with kickstart records expected to be in the same directory as the
 # log file.
 
-version=$version prov-to-sql.sh $1
+version=$version prov-to-sql.sh $1 $2 $3 $4
 

Modified: provenancedb/prepare-for-import
===================================================================
--- provenancedb/prepare-for-import	2013-12-28 01:24:17 UTC (rev 7454)
+++ provenancedb/prepare-for-import	2013-12-31 12:17:27 UTC (rev 7455)
@@ -6,5 +6,5 @@
 
 swift-plot-log $1 execute.global.event execute2.event workflow.event execute2.global.event compound.event internalproc.event
 
-prepare-provenance-chart $1
+prepare-provenance-chart $1 $2 $3 $4
 

Modified: provenancedb/prepare-provenance-chart
===================================================================
--- provenancedb/prepare-provenance-chart	2013-12-28 01:24:17 UTC (rev 7454)
+++ provenancedb/prepare-provenance-chart	2013-12-31 12:17:27 UTC (rev 7455)
@@ -1,8 +1,7 @@
 #!/bin/bash
 
-export RUNID=$(basename $1 .log)
-export WFID="${RUNID}:"
-export EXECUTE2PREFIX="${RUNID}:"
+export WFID="$2:"
+export EXECUTE2PREFIX="$2:"
 cat $1 | grep ' PARAM ' | sed "s/^.* thread=\([^ ]*\).*direction=\([^ ]*\).*variable=\([^ ]*\).*provenanceid=\([^ ]*\).*\$/${WFID}\1 \2 \4 \3/" > tie-data-invocs.txt
 cat $1 | grep ' CONTAINMENT ' | sed 's/^.*parent=\([^ ]*\) child=\([^ ]*\)$/\1 \2/' > tie-containers.txt
 cat $1 | grep ' FILENAME ' | sed 's/^.*dataset=\([^ ]*\) filename=\([^ ]*\).*$/\1 \2/' | sort | uniq > dataset-filenames.txt

Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh	2013-12-28 01:24:17 UTC (rev 7454)
+++ provenancedb/prov-to-sql.sh	2013-12-31 12:17:27 UTC (rev 7455)
@@ -1,8 +1,9 @@
 #!/bin/bash
 
 source_file=$(grep "source file" $1 | awk '{print $5}' | awk -F ":" '{print $1}')	
-source_file_prefix=$(echo $source_file | awk -F "." '{print $1}')
-export RUNID="${source_file_prefix}-$(basename $1 .log)"
+export RUNID=$2
+export source_file_prefix=$3
+export CKSUM=$4
 
 export WFID="${RUNID}:"
 
@@ -18,9 +19,10 @@
 
 echo "    - Function calls."
 while read time duration thread localthread endstate tr_name scratch; do
-    id="${source_file_prefix}-$(echo "$thread" | sed "s/execute\://")"
-    echo "INSERT INTO fun_call (id, type, run_id) VALUES ('$id', 'execute', '$WF');"  >> /tmp/$RUNID.sql
-    echo "INSERT INTO app_fun_call (id, name, start_time, duration, final_state, scratch) VALUES ('$id', '$tr_name', $time, $duration, '$endstate', '$scratch');"   >> /tmp/$RUNID.sql
+    id="$(echo "$thread" | sed "s/execute\://")"
+    fid=${source_file_prefix}-$(echo $id | sed "s/run.../&-$CKSUM/g")
+    echo "INSERT INTO fun_call (id, type, run_id) VALUES ('$fid', 'execute', '$WF');"  >> /tmp/$RUNID.sql
+    echo "INSERT INTO app_fun_call (id, name, start_time, duration, final_state, scratch) VALUES ('$fid', '$tr_name', $time, $duration, '$endstate', '$scratch');"   >> /tmp/$RUNID.sql
 done < execute.global.event
 
 echo "    - Application executions."
@@ -29,7 +31,8 @@
     # parent thread id which should correspond with the execute-level ID
     #inv_id="$WFID$(echo $thread | sed 's/-[^-]*$//')"
     inv_id="$WFID$(echo $thread)"
-    eid=$(echo "$globalid" | sed "s/execute2\://")   
+    aux_eid=$(echo "$globalid" | sed "s/execute2\://")   
+    eid=${source_file_prefix}-$(echo $aux_eid | sed "s/run.../&-$CKSUM/g")
     echo  "INSERT INTO app_exec (id, app_fun_call_id, start_time, duration, final_state, site) VALUES ('$eid', '$inv_id', $start_time, $duration, '$endstate', '$site');"  >> /tmp/$RUNID.sql
 done < execute2.global.event
 
@@ -77,16 +80,14 @@
 
 echo "    - Built-in function calls."
 while read id name output; do
-    fid="${source_file_prefix}-${id}"
     echo  "INSERT INTO ds (id) VALUES ('$output');"  >> /tmp/$RUNID.sql
-    echo  "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$fid', 'function', '$name', '$WF');"  >> /tmp/$RUNID.sql
-    echo  "INSERT INTO dataset_out (function_call_id, dataset_id, parameter) VALUES ('$fid', '$output', 'result');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$id', 'function', '$name', '$WF');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO dataset_out (function_call_id, dataset_id, parameter) VALUES ('$id', '$output', 'result');"  >> /tmp/$RUNID.sql
 done < functions.txt
 
 while read id value; do
-    fid="${source_file_prefix}-${id}"
     echo  "INSERT INTO ds (id) VALUES ('$value');" >> /tmp/$RUNID.sql
-    echo  "INSERT INTO dataset_in (function_call_id, dataset_id, parameter) VALUES ('$fid', '$value', 'undefined');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO dataset_in (function_call_id, dataset_id, parameter) VALUES ('$id', '$value', 'undefined');"  >> /tmp/$RUNID.sql
 done < function-inputs.txt
 
 
@@ -120,7 +121,6 @@
 
 echo "    - Dataset consumption and production."
 while read thread direction dataset variable rest; do 
-    fid="${source_file_prefix}-${thread}"
     if [ "$direction" == "input" ] ; then
 	table=dataset_in
     else
@@ -128,7 +128,7 @@
     fi
     
     echo "INSERT INTO ds (id) VALUES ('$dataset');"  >> /tmp/$RUNID.sql
-    echo "INSERT INTO $table (function_call_id, dataset_id, parameter) VALUES ('$fid', '$dataset', '$variable');"  >> /tmp/$RUNID.sql
+    echo "INSERT INTO $table (function_call_id, dataset_id, parameter) VALUES ('$thread', '$dataset', '$variable');"  >> /tmp/$RUNID.sql
 done < tie-data-invocs.txt
 
 
@@ -158,7 +158,7 @@
 	signals=$(echo $runtime | awk -F "," '{print $20}' | awk -F ":" '{print $2}')
 	exit_status=$(echo $runtime | awk -F "," '{print $21}' | awk -F ":" '{print $2}')
 	
-	echo "UPDATE app_exec SET real_secs='$real_secs', kernel_secs='$kernel_secs', user_secs='$user_secs', percent_cpu='$percent_cpu', max_rss='$max_rss', avg_rss='$avg_rss', avg_tot_vm='$avg_tot_vm', avg_priv_data='$avg_priv_data', avg_priv_stack='$avg_priv_stack', avg_shared_text='$avg_shared_text', page_size='$page_size', major_pgfaults='$major_pgfaults', minor_pgfaults='$minor_pgfaults', swaps='$swaps', invol_context_switches='$invol_context_switches', vol_waits='$vol_waits', fs_reads='$fs_reads', fs_writes='$fs_writes', sock_recv='$sock_recv', sock_send='$sock_send', signals='$signals', exit_status='$exit_status' WHERE id='${source_file_prefix}-$execute2_id';" >> /tmp/$RUNID.sql
+	echo "UPDATE app_exec SET real_secs='$real_secs', kernel_secs='$kernel_secs', user_secs='$user_secs', percent_cpu='$percent_cpu', max_rss='$max_rss', avg_rss='$avg_rss', avg_tot_vm='$avg_tot_vm', avg_priv_data='$avg_priv_data', avg_priv_stack='$avg_priv_stack', avg_shared_text='$avg_shared_text', page_size='$page_size', major_pgfaults='$major_pgfaults', minor_pgfaults='$minor_pgfaults', swaps='$swaps', invol_context_switches='$invol_context_switches', vol_waits='$vol_waits', fs_reads='$fs_reads', fs_writes='$fs_writes', sock_recv='$sock_recv', sock_send='$sock_send', signals='$signals', exit_status='$exit_status' WHERE id='$execute2_id';" >> /tmp/$RUNID.sql
 	#echo "INSERT INTO rt_info (app_exec_id, timestamp, cpu_usage, max_phys_mem, max_virt_mem, io_read, io_write) VALUES ('$execute2_id', $timestamp, $cpu_usage, $max_phys_mem, $max_virtual_mem, $io_read_bytes, $io_write_bytes);"  >> /tmp/$RUNID.sql
 
 #	for key in $(echo maxrss walltime systime usertime cpu fsin fsout timesswapped socketrecv socketsent majorpagefaults minorpagefaults contextswitchesinv contextswitchesvol); do
@@ -170,7 +170,7 @@
 
 echo "    - Function call names."
 while read thread appname; do
-    fid="${source_file_prefix}-${thread}"
+    fid=$(echo $thread | sed "s/run.../&-$CKSUM/g")
     echo  "UPDATE fun_call SET name='$appname' WHERE id='$fid';"  >> /tmp/$RUNID.sql
 done < invocation-procedure-names.txt
 
@@ -180,7 +180,7 @@
 	echo $extrainfo | awk -F ";"  '{ for (i = 1; i <= NF; i++)
                                                print $i
                                          }' | awk -F "=" '{ print $1 " " $2 }' | awk -F ":" '{ print $1 " " $2 }' > fields.txt
-	id=$($SQLCMD --tuples-only -c "select app_fun_call_id from app_exec where id='${source_file_prefix}-$execute2_id';" | awk '{print $1}')
+	id=$($SQLCMD --tuples-only -c "select app_fun_call_id from app_exec where id='$execute2_id';" | awk '{print $1}')
 	while read name type value; do
 	    if [ "$type" = "num" ]; then
 		echo "INSERT INTO annot_app_exec_num (id, name, value) VALUES ('$id', '$name', $value);"  >> /tmp/$RUNID.sql

Modified: provenancedb/swift-prov-import-all-logs
===================================================================
--- provenancedb/swift-prov-import-all-logs	2013-12-28 01:24:17 UTC (rev 7454)
+++ provenancedb/swift-prov-import-all-logs	2013-12-31 12:17:27 UTC (rev 7455)
@@ -33,10 +33,14 @@
 
 while read start version filename cogversion; do
     
-    export IDIR=$(echo $filename | sed 's/\.log$/.d/')
     COG_VERSION=$(grep -m 1 -E 'Swift .* swift-r[0-9]*' $filename | sed 's/.*Swift .* cog-r\([0-9]*\).*/\1/')
-    SCRIPT_FILENAME=$(grep "source file" $filename | awk '{print $5}' | awk -F ":" '{print $1}')
-
+    START_TIME=$(head -1 $filename | iso-to-secs | cut -f 1 -d ' ' | cut -f 1 -d '.')
+    CKSUM=$(cksum $filename | cut -f 1 -d ' ')
+    source_file=$(grep "source file" $filename | awk '{print $5}' | awk -F ":" '{print $1}')	
+    source_file_prefix=$(echo $source_file | awk -F "." '{print $1}')
+    export AUXIDIR=$(echo $filename | sed 's/\.log$/.d/')
+    export IDIR=$(echo $AUXIDIR | sed "s/run...\.d/$source_file_prefix-&/")
+ 
     echo IDIR=$IDIR
     if [ $version -ge 1538 ]; then
 	echo -n "Log: $filename ... "
@@ -56,9 +60,7 @@
 		fi
 		
 		export RUNID=$(basename $filename .log)
-		source_file=$(grep "source file" $filename | awk '{print $5}' | awk -F ":" '{print $1}')	
-		source_file_prefix=$(echo $source_file | awk -F "." '{print $1}')
-		export WF="${source_file_prefix}-${RUNID}"
+		export WF="${source_file_prefix}-${RUNID}-${CKSUM}"
 		echo $WF
 		
 
@@ -66,12 +68,12 @@
 		
 		echo version $version in log file $filename
 		echo ============= will import =============
-		prepare-for-import $filename
+		prepare-for-import $filename $WF $source_file_prefix $CKSUM
 		if [ "$?" != "0" ]; then
 		    echo prepare-for-import failed
 		    exit 2
 		fi
-		version=$version import-run-to-sql $filename
+		version=$version import-run-to-sql $filename $WF $source_file_prefix $CKSUM
 		if [ "$?" != "0" ]; then
 		    echo import-run-to-sql failed
 		    exit 3




More information about the Swift-commit mailing list