[Swift-commit] r7454 - provenancedb

lgadelha at ci.uchicago.edu lgadelha at ci.uchicago.edu
Fri Dec 27 19:24:17 CST 2013


Author: lgadelha
Date: 2013-12-27 19:24:17 -0600 (Fri, 27 Dec 2013)
New Revision: 7454

Modified:
   provenancedb/prov-init.sql
   provenancedb/prov-to-sql.sh
   provenancedb/swift-prov-import-all-logs
Log:
Adjustment to new format for ids in the log file


Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql	2013-12-26 04:00:11 UTC (rev 7453)
+++ provenancedb/prov-init.sql	2013-12-28 01:24:17 UTC (rev 7454)
@@ -360,11 +360,11 @@
        select entity_id, entity_type, key, numeric_value, NULL as text_value from annotation_numeric;
        
 create view script_and_fun_call as
-       select script_run_summary.id as script_run_id, log_filename, script_filename, swift_version, cog_version,
+       select script_run_summary.id as script_run_id, script_filename, swift_version, cog_version,
        	      script_run_summary.final_state as script_run_final_state, 
        	      script_run_summary.start_time as script_run_start_time,
        	      script_run_summary.duration as script_run_duration, 
-      	      script_filename, fun_call.id as function_call_id, 
+      	      fun_call.id as function_call_id, 
        	      fun_call.type as function_call_type, 
        	      fun_call.name as function_call_name 
        from   script_run_summary,fun_call 

Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh	2013-12-26 04:00:11 UTC (rev 7453)
+++ provenancedb/prov-to-sql.sh	2013-12-28 01:24:17 UTC (rev 7454)
@@ -1,6 +1,8 @@
 #!/bin/bash
 
-export RUNID=$(basename $1 .log)
+source_file=$(grep "source file" $1 | awk '{print $5}' | awk -F ":" '{print $1}')	
+source_file_prefix=$(echo $source_file | awk -F "." '{print $1}')
+export RUNID="${source_file_prefix}-$(basename $1 .log)"
 
 export WFID="${RUNID}:"
 
@@ -16,7 +18,7 @@
 
 echo "    - Function calls."
 while read time duration thread localthread endstate tr_name scratch; do
-    id=$(echo "$thread" | sed "s/execute\://")
+    id="${source_file_prefix}-$(echo "$thread" | sed "s/execute\://")"
     echo "INSERT INTO fun_call (id, type, run_id) VALUES ('$id', 'execute', '$WF');"  >> /tmp/$RUNID.sql
     echo "INSERT INTO app_fun_call (id, name, start_time, duration, final_state, scratch) VALUES ('$id', '$tr_name', $time, $duration, '$endstate', '$scratch');"   >> /tmp/$RUNID.sql
 done < execute.global.event
@@ -75,14 +77,16 @@
 
 echo "    - Built-in function calls."
 while read id name output; do
+    fid="${source_file_prefix}-${id}"
     echo  "INSERT INTO ds (id) VALUES ('$output');"  >> /tmp/$RUNID.sql
-    echo  "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$id', 'function', '$name', '$WF');"  >> /tmp/$RUNID.sql
-    echo  "INSERT INTO dataset_out (function_call_id, dataset_id, parameter) VALUES ('$id', '$output', 'result');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$fid', 'function', '$name', '$WF');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO dataset_out (function_call_id, dataset_id, parameter) VALUES ('$fid', '$output', 'result');"  >> /tmp/$RUNID.sql
 done < functions.txt
 
 while read id value; do
+    fid="${source_file_prefix}-${id}"
     echo  "INSERT INTO ds (id) VALUES ('$value');" >> /tmp/$RUNID.sql
-    echo  "INSERT INTO dataset_in (function_call_id, dataset_id, parameter) VALUES ('$id', '$value', 'undefined');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO dataset_in (function_call_id, dataset_id, parameter) VALUES ('$fid', '$value', 'undefined');"  >> /tmp/$RUNID.sql
 done < function-inputs.txt
 
 
@@ -116,6 +120,7 @@
 
 echo "    - Dataset consumption and production."
 while read thread direction dataset variable rest; do 
+    fid="${source_file_prefix}-${thread}"
     if [ "$direction" == "input" ] ; then
 	table=dataset_in
     else
@@ -123,7 +128,7 @@
     fi
     
     echo "INSERT INTO ds (id) VALUES ('$dataset');"  >> /tmp/$RUNID.sql
-    echo "INSERT INTO $table (function_call_id, dataset_id, parameter) VALUES ('$thread', '$dataset', '$variable');"  >> /tmp/$RUNID.sql
+    echo "INSERT INTO $table (function_call_id, dataset_id, parameter) VALUES ('$fid', '$dataset', '$variable');"  >> /tmp/$RUNID.sql
 done < tie-data-invocs.txt
 
 
@@ -153,7 +158,7 @@
 	signals=$(echo $runtime | awk -F "," '{print $20}' | awk -F ":" '{print $2}')
 	exit_status=$(echo $runtime | awk -F "," '{print $21}' | awk -F ":" '{print $2}')
 	
-	echo "UPDATE app_exec SET real_secs='$real_secs', kernel_secs='$kernel_secs', user_secs='$user_secs', percent_cpu='$percent_cpu', max_rss='$max_rss', avg_rss='$avg_rss', avg_tot_vm='$avg_tot_vm', avg_priv_data='$avg_priv_data', avg_priv_stack='$avg_priv_stack', avg_shared_text='$avg_shared_text', page_size='$page_size', major_pgfaults='$major_pgfaults', minor_pgfaults='$minor_pgfaults', swaps='$swaps', invol_context_switches='$invol_context_switches', vol_waits='$vol_waits', fs_reads='$fs_reads', fs_writes='$fs_writes', sock_recv='$sock_recv', sock_send='$sock_send', signals='$signals', exit_status='$exit_status' WHERE id='$execute2_id';" >> /tmp/$RUNID.sql
+	echo "UPDATE app_exec SET real_secs='$real_secs', kernel_secs='$kernel_secs', user_secs='$user_secs', percent_cpu='$percent_cpu', max_rss='$max_rss', avg_rss='$avg_rss', avg_tot_vm='$avg_tot_vm', avg_priv_data='$avg_priv_data', avg_priv_stack='$avg_priv_stack', avg_shared_text='$avg_shared_text', page_size='$page_size', major_pgfaults='$major_pgfaults', minor_pgfaults='$minor_pgfaults', swaps='$swaps', invol_context_switches='$invol_context_switches', vol_waits='$vol_waits', fs_reads='$fs_reads', fs_writes='$fs_writes', sock_recv='$sock_recv', sock_send='$sock_send', signals='$signals', exit_status='$exit_status' WHERE id='${source_file_prefix}-$execute2_id';" >> /tmp/$RUNID.sql
 	#echo "INSERT INTO rt_info (app_exec_id, timestamp, cpu_usage, max_phys_mem, max_virt_mem, io_read, io_write) VALUES ('$execute2_id', $timestamp, $cpu_usage, $max_phys_mem, $max_virtual_mem, $io_read_bytes, $io_write_bytes);"  >> /tmp/$RUNID.sql
 
 #	for key in $(echo maxrss walltime systime usertime cpu fsin fsout timesswapped socketrecv socketsent majorpagefaults minorpagefaults contextswitchesinv contextswitchesvol); do
@@ -165,7 +170,8 @@
 
 echo "    - Function call names."
 while read thread appname; do
-    echo  "UPDATE fun_call SET name='$appname' WHERE id='$thread';"  >> /tmp/$RUNID.sql
+    fid="${source_file_prefix}-${thread}"
+    echo  "UPDATE fun_call SET name='$appname' WHERE id='$fid';"  >> /tmp/$RUNID.sql
 done < invocation-procedure-names.txt
 
 echo "    - Wrapper log extra info."
@@ -174,7 +180,7 @@
 	echo $extrainfo | awk -F ";"  '{ for (i = 1; i <= NF; i++)
                                                print $i
                                          }' | awk -F "=" '{ print $1 " " $2 }' | awk -F ":" '{ print $1 " " $2 }' > fields.txt
-	id=$($SQLCMD --tuples-only -c "select app_fun_call_id from app_exec where id='$execute2_id';" | awk '{print $1}')
+	id=$($SQLCMD --tuples-only -c "select app_fun_call_id from app_exec where id='${source_file_prefix}-$execute2_id';" | awk '{print $1}')
 	while read name type value; do
 	    if [ "$type" = "num" ]; then
 		echo "INSERT INTO annot_app_exec_num (id, name, value) VALUES ('$id', '$name', $value);"  >> /tmp/$RUNID.sql

Modified: provenancedb/swift-prov-import-all-logs
===================================================================
--- provenancedb/swift-prov-import-all-logs	2013-12-26 04:00:11 UTC (rev 7453)
+++ provenancedb/swift-prov-import-all-logs	2013-12-28 01:24:17 UTC (rev 7454)
@@ -35,12 +35,14 @@
     
     export IDIR=$(echo $filename | sed 's/\.log$/.d/')
     COG_VERSION=$(grep -m 1 -E 'Swift .* swift-r[0-9]*' $filename | sed 's/.*Swift .* cog-r\([0-9]*\).*/\1/')
+    SCRIPT_FILENAME=$(grep "source file" $filename | awk '{print $5}' | awk -F ":" '{print $1}')
+
     echo IDIR=$IDIR
     if [ $version -ge 1538 ]; then
 	echo -n "Log: $filename ... "
         # TODO: does not work in sqlite
-	fname=$(echo $filename | sed -e 's:[^\`].*/::')
-	EXISTING=$($SQLCMD --tuples-only -c "select count(*) from run where log_filename='$fname';")
+	# fname=$(echo $filename | sed -e 's:[^\`].*/::')
+	EXISTING=$($SQLCMD --tuples-only -c "select count(*) from run where log_filename='$filename';")
 	
 	if [ "$EXISTING" -eq "0" ];  then
 	    PROV_ENABLED=$(grep provenanceid $filename | wc -l)
@@ -54,11 +56,13 @@
 		fi
 		
 		export RUNID=$(basename $filename .log)
-		source_file=$(echo $fname | sed "s/-[0-9]*-[0-9]*-[0-9a-z]*\.log$/\.swift/")	
-		export WF="${RUNID}"
+		source_file=$(grep "source file" $filename | awk '{print $5}' | awk -F ":" '{print $1}')	
+		source_file_prefix=$(echo $source_file | awk -F "." '{print $1}')
+		export WF="${source_file_prefix}-${RUNID}"
+		echo $WF
 		
-		#echo "BEGIN TRANSACTION;" > /tmp/$WF.sql
-		echo "INSERT INTO run (id, log_filename, script_filename, swift_version, cog_version, final_state) VALUES ('$WF','$fname','$source_file','$version', '$COG_VERSION', '$wfstatus');" >> /tmp/$WF.sql
+
+		echo "INSERT INTO run (id, log_filename, script_filename, swift_version, cog_version, final_state) VALUES ('$WF','$filename','$source_file','$version', '$COG_VERSION', '$wfstatus');" >> /tmp/$WF.sql
 		
 		echo version $version in log file $filename
 		echo ============= will import =============
@@ -83,7 +87,3 @@
     fi
 done < /tmp/everylog-vs-versions.data
 
-# now pull the XML data into eXist, in as few runs as possible to avoid
-# JVM startup.
-# ls `pwd`/*.log.xml | time xargs ~/work/eXist/bin/client.sh -m /db/prov -ouri=xmldb:exist:// -p
-




More information about the Swift-commit mailing list