[Swift-commit] r5609 - in provenancedb: . etc

lgadelha at ci.uchicago.edu lgadelha at ci.uchicago.edu
Tue Feb 14 10:15:37 CST 2012


Author: lgadelha
Date: 2012-02-14 10:15:37 -0600 (Tue, 14 Feb 2012)
New Revision: 5609

Added:
   provenancedb/build_script_run_provenance_graph.sh
   provenancedb/list_script_runs.sh
Modified:
   provenancedb/etc/provenance.config.ci
   provenancedb/prov-init.sql
   provenancedb/prov-to-sql.sh
   provenancedb/swift-prov-import-all-logs
Log:
Updates to import script, schema. Minor fixes to schema.


Added: provenancedb/build_script_run_provenance_graph.sh
===================================================================
--- provenancedb/build_script_run_provenance_graph.sh	                        (rev 0)
+++ provenancedb/build_script_run_provenance_graph.sh	2012-02-14 16:15:37 UTC (rev 5609)
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+PROVDIR=$(dirname $0)
+pushd $PROVDIR 
+PROVDIR=$(pwd)
+popd
+
+# we need to keep this out of the log-proceesing dir because import
+# of individual runs will clean other files.
+
+source $PROVDIR/etc/provenance.config
+export PATH=$PROVDIR:$PATH
+
+query="select pgraph_edge.* from proc,pgraph_edge where (proc.id=pgraph_edge.parent or proc.id=pgraph_edge.child) and proc.run_id='$1';"
+
+echo "digraph \"$1\" {" > $1.dot
+#$SQLCMD --tuples-only -c "$query" | sed -e '/^ *$/d' | awk '{print "\""$1"\"" " -> " "\""$3"\""}' >> $1.dot
+$SQLCMD --tuples-only -c "$query" | sed -e '/^ *$/d' > /tmp/$1.tmp
+
+while read parent separator child; do
+	isfc=$(echo $parent | grep ^execute)
+	if [ "X" == "X$isfc" ]; then
+		variable=$parent
+		functioncall=$child
+	else
+		variable=$child
+		functioncall=$parent
+	fi
+		
+	variabletype=$($SQLCMD --tuples-only -c "select type from variable where id='$variable'" | awk '{print $1}')
+	
+	if [ "$variabletype" == "mapped" ]; then
+		variablelabel="variable:mapped:"$($SQLCMD --tuples-only -c "select filename from variable where id='$variable'" | awk '{print $1}')
+	fi
+	if [ "$variabletype" == "primitive" ]; then
+		variablelabel="variable:primitive:"$($SQLCMD --tuples-only -c "select value from variable where id='$variable'" | awk '{print $1}')
+	fi
+	if [ "$variabletype" == "composite" ]; then
+		variablelabel="variable:composite"
+	fi
+	
+	functioncalllabel="functioncall:"$($SQLCMD --tuples-only -c "select name from function_call where id='$functioncall'" | awk '{print $1}')
+	echo "\"$variable\" [ label=\"$variablelabel\" ];" >> /tmp/$1.header.dot
+	echo "\"$functioncall\" [ label=\"$functioncalllabel\"];" >> /tmp/$1.header.dot
+	echo "\"$parent\" -> \"$child\";" >> /tmp/$1.body.dot
+done < /tmp/$1.tmp
+
+cat /tmp/$1.header.dot | sort | uniq >> $1.dot
+cat /tmp/$1.body.dot >> $1.dot
+echo "}" >> $1.dot
\ No newline at end of file


Property changes on: provenancedb/build_script_run_provenance_graph.sh
___________________________________________________________________
Added: svn:executable
   + *

Modified: provenancedb/etc/provenance.config.ci
===================================================================
--- provenancedb/etc/provenance.config.ci	2012-02-14 05:34:00 UTC (rev 5608)
+++ provenancedb/etc/provenance.config.ci	2012-02-14 16:15:37 UTC (rev 5609)
@@ -1,7 +1,6 @@
 # file to source that sets variables for the various paths that are
 # presently hardcoded
 
-# this is the path to log repo on benc's laptop
 export LOGREPO=~/swift-logs
 export SQLCMD="psql -U provdb -h db.ci.uchicago.edu provdb"
 

Added: provenancedb/list_script_runs.sh
===================================================================
--- provenancedb/list_script_runs.sh	                        (rev 0)
+++ provenancedb/list_script_runs.sh	2012-02-14 16:15:37 UTC (rev 5609)
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+PROVDIR=$(dirname $0)
+pushd $PROVDIR
+PROVDIR=$(pwd)
+popd
+
+# we need to keep this out of the log-proceesing dir because import
+# of individual runs will clean other files.
+
+source $PROVDIR/etc/provenance.config
+export PATH=$PROVDIR:$PATH
+
+query="select * from script_run;"
+
+$SQLCMD -c "$query"
\ No newline at end of file


Property changes on: provenancedb/list_script_runs.sh
___________________________________________________________________
Added: svn:executable
   + *

Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql	2012-02-14 05:34:00 UTC (rev 5608)
+++ provenancedb/prov-init.sql	2012-02-14 16:15:37 UTC (rev 5609)
@@ -30,7 +30,7 @@
      log_filename  varchar(2048),
      swift_version varchar(16),
      cog_version   varchar(16),
-     final_state   varchar(16),
+     final_state   varchar(32),
      start_time    numeric,
      duration      numeric,
      script_source text,
@@ -60,7 +60,7 @@
      proc_name      varchar(256), -- name of the app procedure that invokes the transformation
      start_time     numeric,
      duration       numeric,
-     final_state    varchar(16),
+     final_state    varchar(32),
      scratch        varchar(2048)
     );
 
@@ -72,7 +72,7 @@
      app_inv_id        varchar(256) references app_inv (id) on delete cascade, 
      start_time        numeric,
      duration          numeric,
-     final_state       varchar(16),
+     final_state       varchar(32),
      site              varchar(256)
     );
 
@@ -240,7 +240,7 @@
 drop view function_call;
 
 create view function_call as 
-    select proc.id, app_inv.proc_name as name, proc.type, proc.name as tc_name, proc.run_id as script_run_id,  
+    select proc.id, proc.name as name, proc.type, app_inv.proc_name as tc_name, proc.run_id as script_run_id,  
            to_timestamp(app_inv.start_time) as start_time, app_inv.duration, app_inv.final_state, app_inv.scratch 
     from proc 
     left outer join 

Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh	2012-02-14 05:34:00 UTC (rev 5608)
+++ provenancedb/prov-to-sql.sh	2012-02-14 16:15:37 UTC (rev 5609)
@@ -11,36 +11,42 @@
 
 # this gives a distinction between the root process for a workflow and the
 # workflow itself. perhaps better to model the workflow as a process
-$SQLCMD -c "INSERT INTO proc (id, type, name, run_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');"
+#echo "BEGIN TRANSACTION;" > /tmp/$RUNID.sql
+echo "INSERT INTO proc (id, type, name, run_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" >> /tmp/$RUNID.sql
 
 while read time duration thread localthread endstate tr_name scratch; do
-    $SQLCMD -c "INSERT INTO proc (id, type, name, run_id) VALUES ('$thread', 'execute', '$tr_name', '$WF');" 
-    $SQLCMD -c "INSERT INTO app_inv (id, start_time, duration, final_state, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$scratch');" 
+    echo "INSERT INTO proc (id, type, run_id) VALUES ('$thread', 'execute', '$WF');"  >> /tmp/$RUNID-1.sql
+    echo "INSERT INTO app_inv (id, proc_name, start_time, duration, final_state, scratch) VALUES ('$thread', '$tr_name', $time, $duration, '$endstate', '$scratch');"   >> /tmp/$RUNID-2.sql
 done < execute.global.event
 
 while read start_time duration globalid id endstate thread site scratch; do
     # cut off the last component of the thread, so that we end up at the
     # parent thread id which should correspond with the execute-level ID
     inv_id="$WFID$(echo $thread | sed 's/-[^-]*$//')"
-    $SQLCMD -c  "INSERT INTO app_exec (id, app_inv_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');"
+    echo  "INSERT INTO app_exec (id, app_inv_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');"  >> /tmp/$RUNID-3.sql
 done < execute2.global.event
 
-while read outer inner; do
-    $SQLCMD -c  "INSERT INTO ds (id) VALUES ('$outer');"
-    $SQLCMD -c  "INSERT INTO ds (id) VALUES ('$inner');"
-    $SQLCMD -c  "INSERT INTO ds_cont (out_id, in_id) VALUES ('$outer', '$inner');"
-done < tie-containers.txt
 
 while read dataset filename; do
-    $SQLCMD -c "INSERT INTO ds (id) VALUES ('$dataset');"
-    $SQLCMD -c "INSERT INTO file (id, name) VALUES ('$dataset', '$filename');"
+    echo "INSERT INTO ds (id) VALUES ('$dataset');"  >> /tmp/$RUNID-4.sql
+    echo "INSERT INTO file (id, name) VALUES ('$dataset', '$filename');"  >> /tmp/$RUNID-5.sql
 done < dataset-filenames.txt
 
 while read dataset idtype equal value rest; do
-    $SQLCMD -c "INSERT INTO ds (id) VALUES ('$dataset');"
-    $SQLCMD -c "INSERT INTO in_mem (id, value) VALUES ('$dataset', '$value');"
+    echo "INSERT INTO ds (id) VALUES ('$dataset');"  >> /tmp/$RUNID-4.sql
+    echo "INSERT INTO in_mem (id, value) VALUES ('$dataset', '$value');"  >> /tmp/$RUNID-5.sql
 done < dataset-values.txt
 
+while read outer inner; do
+    echo  "INSERT INTO ds (id) VALUES ('$outer');"  >> /tmp/$RUNID-4.sql
+    echo  "INSERT INTO ds (id) VALUES ('$inner');"  >> /tmp/$RUNID-4.sql
+    echo  "INSERT INTO ds_cont (out_id, in_id) VALUES ('$outer', '$inner');"  >> /tmp/$RUNID-5.sql
+    echo  "INSERT INTO proc (id, type, name, run_id) VALUES ('${WFID}constructor:$outer', 'constructor', 'constructor', '$WF');"  >> /tmp/$RUNID-1.sql
+    echo  "INSERT INTO ds_in (proc_id, ds_id, param) VALUES ('${WFID}constructor:$outer', '$inner', 'element');"  >> /tmp/$RUNID-5.sql
+    echo  "INSERT INTO ds_out (proc_id, ds_id, param) VALUES ('${WFID}constructor:$outer', '$outer', 'collection');"  >> /tmp/$RUNID-5.sql
+done < tie-containers.txt
+
+
 while read col1 col2 col3 col4 col5 thread name lhs rhs result; do
     thread=$(echo $thread | awk 'BEGIN { FS = "=" }; {print $2}')
     name=$(echo $name | awk 'BEGIN { FS = "=" }; {print $2}')
@@ -50,34 +56,34 @@
     
     operatorid="${WFID}operator:$thread"
     
-    #$SQLCMD -c  "INSERT INTO ds (id) VALUES ('$lhs');"
-    #$SQLCMD -c  "INSERT INTO ds (id) VALUES ('$rhs');"
-    #$SQLCMD -c  "INSERT INTO ds (id) VALUES ('$result');"
-    $SQLCMD -c  "INSERT INTO proc (id, type, name, run_id) VALUES ('$operatorid', 'operator', '$name', '$WF');"
-    $SQLCMD -c  "INSERT INTO ds_in (proc_id, ds_id, param) VALUES ('$operatorid', '$lhs', 'lhs');"
-    $SQLCMD -c  "INSERT INTO ds_in (proc_id, ds_id, param) VALUES ('$operatorid', '$rhs', 'rhs');"
-    $SQLCMD -c  "INSERT INTO ds_out (proc_id, ds_id, param) VALUES ('$operatorid', '$result', 'result');"
+    echo  "INSERT INTO ds (id) VALUES ('$lhs');" >> /tmp/$RUNID-4.sql
+    echo  "INSERT INTO ds (id) VALUES ('$rhs');" >> /tmp/$RUNID-4.sql
+    echo  "INSERT INTO ds (id) VALUES ('$result');" >> /tmp/$RUNID-4.sql
+    echo  "INSERT INTO proc (id, type, name, run_id) VALUES ('$operatorid', 'operator', '$name', '$WF');"  >> /tmp/$RUNID-1.sql
+    echo  "INSERT INTO ds_in (proc_id, ds_id, param) VALUES ('$operatorid', '$lhs', 'lhs');"  >> /tmp/$RUNID-5.sql
+    echo  "INSERT INTO ds_in (proc_id, ds_id, param) VALUES ('$operatorid', '$rhs', 'rhs');"  >> /tmp/$RUNID-5.sql
+    echo  "INSERT INTO ds_out (proc_id, ds_id, param) VALUES ('$operatorid', '$result', 'result');"  >> /tmp/$RUNID-5.sql
 done < operators.txt
 
 while read id name output; do
-    #$SQLCMD -c  "INSERT INTO ds (id) VALUES ('$output');"
-    $SQLCMD -c  "INSERT INTO proc (id, type, name, run_id) VALUES ('$id', 'function', '$name', '$WF');"
-    $SQLCMD -c  "INSERT INTO ds_out (proc_id, ds_id, param) VALUES ('$id', '$output', 'result');"
+    echo  "INSERT INTO ds (id) VALUES ('$output');"  >> /tmp/$RUNID-4.sql
+    echo  "INSERT INTO proc (id, type, name, run_id) VALUES ('$id', 'function', '$name', '$WF');"  >> /tmp/$RUNID-1.sql
+    echo  "INSERT INTO ds_out (proc_id, ds_id, param) VALUES ('$id', '$output', 'result');"  >> /tmp/$RUNID-5.sql
 done < functions.txt
 
 while read id value; do
-    #$SQLCMD -c  "INSERT INTO ds (id) VALUES ('$value');"
-    $SQLCMD -c  "INSERT INTO ds_in (proc_id, ds_id, param) VALUES ('$id', '$value', 'undefined');"
+    echo  "INSERT INTO ds (id) VALUES ('$value');" >> /tmp/$RUNID-4.sql
+    echo  "INSERT INTO ds_in (proc_id, ds_id, param) VALUES ('$id', '$value', 'undefined');"  >> /tmp/$RUNID-5.sql
 done < function-inputs.txt
 
 
 while read thread appname; do
-    $SQLCMD -c  "UPDATE app_inv SET proc_name='$appname' WHERE id='$thread';"
+    echo  "UPDATE proc SET name='$appname' WHERE id='$thread';"  >> /tmp/$RUNID-3.sql
 done < invocation-procedure-names.txt
 
 while read start duration wfid rest; do
-    $SQLCMD -c "UPDATE run SET start_time=$start WHERE id='$WF';"
-    $SQLCMD -c "UPDATE run SET duration=$duration WHERE id='$WF';"
+    echo "UPDATE run SET start_time=$start WHERE id='$WF';"  >> /tmp/$RUNID-1.sql
+    echo "UPDATE run SET duration=$duration WHERE id='$WF';"  >> /tmp/$RUNID-1.sql
 done < workflow.event
 
 
@@ -85,20 +91,20 @@
 while read start duration thread final_state procname ; do
     if [ "$duration" != "last-event-line" ]; then
 	compoundid=$WFID$thread
-	$SQLCMD -c "INSERT INTO proc (id, type, name, run_id) VALUES ('$compoundid', 'compound', '$procname', '$WF');"
+	echo "INSERT INTO proc (id, type, name, run_id) VALUES ('$compoundid', 'compound', '$procname', '$WF');"  >> /tmp/$RUNID-1.sql
     fi
 done < compound.event
 
 while read start duration thread final_state procname ; do
     if [ "$duration" != "last-event-line" ]; then
 	fqid=$WFID$thread
-	$SQLCMD -c "INSERT INTO proc (id, type, name, run_id) VALUES ('$fqid', 'internal', '$procname', '$WF');"
+	echo "INSERT INTO proc (id, type, name, run_id) VALUES ('$fqid', 'internal', '$procname', '$WF');"  >> /tmp/$RUNID-1.sql
     fi	
 done < internalproc.event
 
 while read t ; do 
     thread="${WFID}$t"
-    $SQLCMD -c "INSERT INTO proc (id, type, name, run_id) VALUES ('$thread', 'scope', 'scope', '$WF');"
+    echo "INSERT INTO proc (id, type, name, run_id) VALUES ('$thread', 'scope', 'scope', '$WF');"  >> /tmp/$RUNID-1.sql
 done < scopes.txt
 
 while read thread direction dataset variable rest; do 
@@ -108,12 +114,8 @@
 	table=ds_out
     fi
     
-    EXISTING=$($SQLCMD --tuples-only -c "select count(*) from ds where ds.id='$dataset';")
-    
-    if [ "$EXISTING" -eq "0" ];  then
-	$SQLCMD -c "INSERT INTO ds (id) VALUES ('$dataset');"
-    fi 
-    $SQLCMD -c "INSERT INTO $table (proc_id, ds_id, param) VALUES ('$thread', '$dataset', '$variable');"
+	echo "INSERT INTO ds (id) VALUES ('$dataset');"  >> /tmp/$RUNID-4.sql
+    echo "INSERT INTO $table (proc_id, ds_id, param) VALUES ('$thread', '$dataset', '$variable');"  >> /tmp/$RUNID-5.sql
 done < tie-data-invocs.txt
 
 if [ -f extrainfo.txt ]; then
@@ -124,10 +126,10 @@
 	id=$($SQLCMD --tuples-only -c "select app_inv_id from app_exec where id='$execute2_id';" | awk '{print $1}')
 	while read name type value; do
 	    if [ "$type" = "num" ]; then
-		$SQLCMD -c "INSERT INTO a_proc_n (id, name, value) VALUES ('$id', '$name', $value);"
+		echo "INSERT INTO a_proc_n (id, name, value) VALUES ('$id', '$name', $value);"  >> /tmp/$RUNID-6.sql
 	    fi 
 	    if [ "$type" = "txt" ]; then
-		$SQLCMD -c "INSERT INTO a_proc_t (id, name, value) VALUES ('$id', '$name', '$value');"
+		echo "INSERT INTO a_proc_t (id, name, value) VALUES ('$id', '$name', '$value');"  >> /tmp/$RUNID-6.sql
 	    fi
 	done < fields.txt
     done < extrainfo.txt
@@ -141,8 +143,17 @@
 	max_virtual_mem=$(echo $runtime | awk -F "," '{print $4}' | awk -F ":" '{print $2}')
 	io_read_bytes=$(echo $runtime | awk -F "," '{print $5}' | awk -F ":" '{print $2}')
 	io_write_bytes=$(echo $runtime | awk -F "," '{print $6}' | awk -F ":" '{print $2}')
-	$SQLCMD -c "INSERT INTO runtime_info (app_execution_id, tstamp, cpu_usage, max_phys_mem, max_virtual_mem, io_read_bytes, io_write_bytes) VALUES ('$execute2_id', $timestamp, $cpu_usage, $max_phys_mem, $max_virtual_mem, $io_read_bytes, $io_write_bytes);"
+	echo "INSERT INTO runtime_info (app_execution_id, tstamp, cpu_usage, max_phys_mem, max_virtual_mem, io_read_bytes, io_write_bytes) VALUES ('$execute2_id', $timestamp, $cpu_usage, $max_phys_mem, $max_virtual_mem, $io_read_bytes, $io_write_bytes);"  >> /tmp/$RUNID-6.sql
     done < runtime.txt
 fi
 
+for i in `seq 1 6`
+do
+	cat /tmp/$RUNID-$i.sql | sort | uniq >> /tmp/$RUNID.sql
+	#rm /tmp/$RUNID-$i.sql
+done
+
+echo "COMMIT;" >> /tmp/$RUNID.sql
+$SQLCMD -f /tmp/$RUNID.sql
+#rm /tmp/$RUNID.sql
 echo Finished sending SQL to DB
\ No newline at end of file

Modified: provenancedb/swift-prov-import-all-logs
===================================================================
--- provenancedb/swift-prov-import-all-logs	2012-02-14 05:34:00 UTC (rev 5608)
+++ provenancedb/swift-prov-import-all-logs	2012-02-14 16:15:37 UTC (rev 5609)
@@ -56,7 +56,8 @@
 		
 		export WF="${RUNID}"
 		
-		$SQLCMD -c "INSERT INTO run (id, log_filename, swift_version, cog_version, final_state) VALUES ('$WF','$filename','$version', '', '$wfstatus');" 
+		echo "BEGIN TRANSACTION;" > /tmp/$WF.sql
+		echo "INSERT INTO run (id, log_filename, swift_version, cog_version, final_state) VALUES ('$WF','$filename','$version', '', '$wfstatus');" >> /tmp/$WF.sql
 		
 		echo version $version in log file $filename
 		echo ============= will import =============




More information about the Swift-commit mailing list