[Swift-commit] r5609 - in provenancedb: . etc
lgadelha at ci.uchicago.edu
lgadelha at ci.uchicago.edu
Tue Feb 14 10:15:37 CST 2012
Author: lgadelha
Date: 2012-02-14 10:15:37 -0600 (Tue, 14 Feb 2012)
New Revision: 5609
Added:
provenancedb/build_script_run_provenance_graph.sh
provenancedb/list_script_runs.sh
Modified:
provenancedb/etc/provenance.config.ci
provenancedb/prov-init.sql
provenancedb/prov-to-sql.sh
provenancedb/swift-prov-import-all-logs
Log:
Updates to import script, schema. Minor fixes to schema.
Added: provenancedb/build_script_run_provenance_graph.sh
===================================================================
--- provenancedb/build_script_run_provenance_graph.sh (rev 0)
+++ provenancedb/build_script_run_provenance_graph.sh 2012-02-14 16:15:37 UTC (rev 5609)
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+PROVDIR=$(dirname $0)
+pushd $PROVDIR
+PROVDIR=$(pwd)
+popd
+
+# we need to keep this out of the log-proceesing dir because import
+# of individual runs will clean other files.
+
+source $PROVDIR/etc/provenance.config
+export PATH=$PROVDIR:$PATH
+
+query="select pgraph_edge.* from proc,pgraph_edge where (proc.id=pgraph_edge.parent or proc.id=pgraph_edge.child) and proc.run_id='$1';"
+
+echo "digraph \"$1\" {" > $1.dot
+#$SQLCMD --tuples-only -c "$query" | sed -e '/^ *$/d' | awk '{print "\""$1"\"" " -> " "\""$3"\""}' >> $1.dot
+$SQLCMD --tuples-only -c "$query" | sed -e '/^ *$/d' > /tmp/$1.tmp
+
+while read parent separator child; do
+ isfc=$(echo $parent | grep ^execute)
+ if [ "X" == "X$isfc" ]; then
+ variable=$parent
+ functioncall=$child
+ else
+ variable=$child
+ functioncall=$parent
+ fi
+
+ variabletype=$($SQLCMD --tuples-only -c "select type from variable where id='$variable'" | awk '{print $1}')
+
+ if [ "$variabletype" == "mapped" ]; then
+ variablelabel="variable:mapped:"$($SQLCMD --tuples-only -c "select filename from variable where id='$variable'" | awk '{print $1}')
+ fi
+ if [ "$variabletype" == "primitive" ]; then
+ variablelabel="variable:primitive:"$($SQLCMD --tuples-only -c "select value from variable where id='$variable'" | awk '{print $1}')
+ fi
+ if [ "$variabletype" == "composite" ]; then
+ variablelabel="variable:composite"
+ fi
+
+ functioncalllabel="functioncall:"$($SQLCMD --tuples-only -c "select name from function_call where id='$functioncall'" | awk '{print $1}')
+ echo "\"$variable\" [ label=\"$variablelabel\" ];" >> /tmp/$1.header.dot
+ echo "\"$functioncall\" [ label=\"$functioncalllabel\"];" >> /tmp/$1.header.dot
+ echo "\"$parent\" -> \"$child\";" >> /tmp/$1.body.dot
+done < /tmp/$1.tmp
+
+cat /tmp/$1.header.dot | sort | uniq >> $1.dot
+cat /tmp/$1.body.dot >> $1.dot
+echo "}" >> $1.dot
\ No newline at end of file
Property changes on: provenancedb/build_script_run_provenance_graph.sh
___________________________________________________________________
Added: svn:executable
+ *
Modified: provenancedb/etc/provenance.config.ci
===================================================================
--- provenancedb/etc/provenance.config.ci 2012-02-14 05:34:00 UTC (rev 5608)
+++ provenancedb/etc/provenance.config.ci 2012-02-14 16:15:37 UTC (rev 5609)
@@ -1,7 +1,6 @@
# file to source that sets variables for the various paths that are
# presently hardcoded
-# this is the path to log repo on benc's laptop
export LOGREPO=~/swift-logs
export SQLCMD="psql -U provdb -h db.ci.uchicago.edu provdb"
Added: provenancedb/list_script_runs.sh
===================================================================
--- provenancedb/list_script_runs.sh (rev 0)
+++ provenancedb/list_script_runs.sh 2012-02-14 16:15:37 UTC (rev 5609)
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+PROVDIR=$(dirname $0)
+pushd $PROVDIR
+PROVDIR=$(pwd)
+popd
+
+# we need to keep this out of the log-proceesing dir because import
+# of individual runs will clean other files.
+
+source $PROVDIR/etc/provenance.config
+export PATH=$PROVDIR:$PATH
+
+query="select * from script_run;"
+
+$SQLCMD -c "$query"
\ No newline at end of file
Property changes on: provenancedb/list_script_runs.sh
___________________________________________________________________
Added: svn:executable
+ *
Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql 2012-02-14 05:34:00 UTC (rev 5608)
+++ provenancedb/prov-init.sql 2012-02-14 16:15:37 UTC (rev 5609)
@@ -30,7 +30,7 @@
log_filename varchar(2048),
swift_version varchar(16),
cog_version varchar(16),
- final_state varchar(16),
+ final_state varchar(32),
start_time numeric,
duration numeric,
script_source text,
@@ -60,7 +60,7 @@
proc_name varchar(256), -- name of the app procedure that invokes the transformation
start_time numeric,
duration numeric,
- final_state varchar(16),
+ final_state varchar(32),
scratch varchar(2048)
);
@@ -72,7 +72,7 @@
app_inv_id varchar(256) references app_inv (id) on delete cascade,
start_time numeric,
duration numeric,
- final_state varchar(16),
+ final_state varchar(32),
site varchar(256)
);
@@ -240,7 +240,7 @@
drop view function_call;
create view function_call as
- select proc.id, app_inv.proc_name as name, proc.type, proc.name as tc_name, proc.run_id as script_run_id,
+ select proc.id, proc.name as name, proc.type, app_inv.proc_name as tc_name, proc.run_id as script_run_id,
to_timestamp(app_inv.start_time) as start_time, app_inv.duration, app_inv.final_state, app_inv.scratch
from proc
left outer join
Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh 2012-02-14 05:34:00 UTC (rev 5608)
+++ provenancedb/prov-to-sql.sh 2012-02-14 16:15:37 UTC (rev 5609)
@@ -11,36 +11,42 @@
# this gives a distinction between the root process for a workflow and the
# workflow itself. perhaps better to model the workflow as a process
-$SQLCMD -c "INSERT INTO proc (id, type, name, run_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');"
+#echo "BEGIN TRANSACTION;" > /tmp/$RUNID.sql
+echo "INSERT INTO proc (id, type, name, run_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" >> /tmp/$RUNID.sql
while read time duration thread localthread endstate tr_name scratch; do
- $SQLCMD -c "INSERT INTO proc (id, type, name, run_id) VALUES ('$thread', 'execute', '$tr_name', '$WF');"
- $SQLCMD -c "INSERT INTO app_inv (id, start_time, duration, final_state, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$scratch');"
+ echo "INSERT INTO proc (id, type, run_id) VALUES ('$thread', 'execute', '$WF');" >> /tmp/$RUNID-1.sql
+ echo "INSERT INTO app_inv (id, proc_name, start_time, duration, final_state, scratch) VALUES ('$thread', '$tr_name', $time, $duration, '$endstate', '$scratch');" >> /tmp/$RUNID-2.sql
done < execute.global.event
while read start_time duration globalid id endstate thread site scratch; do
# cut off the last component of the thread, so that we end up at the
# parent thread id which should correspond with the execute-level ID
inv_id="$WFID$(echo $thread | sed 's/-[^-]*$//')"
- $SQLCMD -c "INSERT INTO app_exec (id, app_inv_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');"
+ echo "INSERT INTO app_exec (id, app_inv_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');" >> /tmp/$RUNID-3.sql
done < execute2.global.event
-while read outer inner; do
- $SQLCMD -c "INSERT INTO ds (id) VALUES ('$outer');"
- $SQLCMD -c "INSERT INTO ds (id) VALUES ('$inner');"
- $SQLCMD -c "INSERT INTO ds_cont (out_id, in_id) VALUES ('$outer', '$inner');"
-done < tie-containers.txt
while read dataset filename; do
- $SQLCMD -c "INSERT INTO ds (id) VALUES ('$dataset');"
- $SQLCMD -c "INSERT INTO file (id, name) VALUES ('$dataset', '$filename');"
+ echo "INSERT INTO ds (id) VALUES ('$dataset');" >> /tmp/$RUNID-4.sql
+ echo "INSERT INTO file (id, name) VALUES ('$dataset', '$filename');" >> /tmp/$RUNID-5.sql
done < dataset-filenames.txt
while read dataset idtype equal value rest; do
- $SQLCMD -c "INSERT INTO ds (id) VALUES ('$dataset');"
- $SQLCMD -c "INSERT INTO in_mem (id, value) VALUES ('$dataset', '$value');"
+ echo "INSERT INTO ds (id) VALUES ('$dataset');" >> /tmp/$RUNID-4.sql
+ echo "INSERT INTO in_mem (id, value) VALUES ('$dataset', '$value');" >> /tmp/$RUNID-5.sql
done < dataset-values.txt
+while read outer inner; do
+ echo "INSERT INTO ds (id) VALUES ('$outer');" >> /tmp/$RUNID-4.sql
+ echo "INSERT INTO ds (id) VALUES ('$inner');" >> /tmp/$RUNID-4.sql
+ echo "INSERT INTO ds_cont (out_id, in_id) VALUES ('$outer', '$inner');" >> /tmp/$RUNID-5.sql
+ echo "INSERT INTO proc (id, type, name, run_id) VALUES ('${WFID}constructor:$outer', 'constructor', 'constructor', '$WF');" >> /tmp/$RUNID-1.sql
+ echo "INSERT INTO ds_in (proc_id, ds_id, param) VALUES ('${WFID}constructor:$outer', '$inner', 'element');" >> /tmp/$RUNID-5.sql
+ echo "INSERT INTO ds_out (proc_id, ds_id, param) VALUES ('${WFID}constructor:$outer', '$outer', 'collection');" >> /tmp/$RUNID-5.sql
+done < tie-containers.txt
+
+
while read col1 col2 col3 col4 col5 thread name lhs rhs result; do
thread=$(echo $thread | awk 'BEGIN { FS = "=" }; {print $2}')
name=$(echo $name | awk 'BEGIN { FS = "=" }; {print $2}')
@@ -50,34 +56,34 @@
operatorid="${WFID}operator:$thread"
- #$SQLCMD -c "INSERT INTO ds (id) VALUES ('$lhs');"
- #$SQLCMD -c "INSERT INTO ds (id) VALUES ('$rhs');"
- #$SQLCMD -c "INSERT INTO ds (id) VALUES ('$result');"
- $SQLCMD -c "INSERT INTO proc (id, type, name, run_id) VALUES ('$operatorid', 'operator', '$name', '$WF');"
- $SQLCMD -c "INSERT INTO ds_in (proc_id, ds_id, param) VALUES ('$operatorid', '$lhs', 'lhs');"
- $SQLCMD -c "INSERT INTO ds_in (proc_id, ds_id, param) VALUES ('$operatorid', '$rhs', 'rhs');"
- $SQLCMD -c "INSERT INTO ds_out (proc_id, ds_id, param) VALUES ('$operatorid', '$result', 'result');"
+ echo "INSERT INTO ds (id) VALUES ('$lhs');" >> /tmp/$RUNID-4.sql
+ echo "INSERT INTO ds (id) VALUES ('$rhs');" >> /tmp/$RUNID-4.sql
+ echo "INSERT INTO ds (id) VALUES ('$result');" >> /tmp/$RUNID-4.sql
+ echo "INSERT INTO proc (id, type, name, run_id) VALUES ('$operatorid', 'operator', '$name', '$WF');" >> /tmp/$RUNID-1.sql
+ echo "INSERT INTO ds_in (proc_id, ds_id, param) VALUES ('$operatorid', '$lhs', 'lhs');" >> /tmp/$RUNID-5.sql
+ echo "INSERT INTO ds_in (proc_id, ds_id, param) VALUES ('$operatorid', '$rhs', 'rhs');" >> /tmp/$RUNID-5.sql
+ echo "INSERT INTO ds_out (proc_id, ds_id, param) VALUES ('$operatorid', '$result', 'result');" >> /tmp/$RUNID-5.sql
done < operators.txt
while read id name output; do
- #$SQLCMD -c "INSERT INTO ds (id) VALUES ('$output');"
- $SQLCMD -c "INSERT INTO proc (id, type, name, run_id) VALUES ('$id', 'function', '$name', '$WF');"
- $SQLCMD -c "INSERT INTO ds_out (proc_id, ds_id, param) VALUES ('$id', '$output', 'result');"
+ echo "INSERT INTO ds (id) VALUES ('$output');" >> /tmp/$RUNID-4.sql
+ echo "INSERT INTO proc (id, type, name, run_id) VALUES ('$id', 'function', '$name', '$WF');" >> /tmp/$RUNID-1.sql
+ echo "INSERT INTO ds_out (proc_id, ds_id, param) VALUES ('$id', '$output', 'result');" >> /tmp/$RUNID-5.sql
done < functions.txt
while read id value; do
- #$SQLCMD -c "INSERT INTO ds (id) VALUES ('$value');"
- $SQLCMD -c "INSERT INTO ds_in (proc_id, ds_id, param) VALUES ('$id', '$value', 'undefined');"
+ echo "INSERT INTO ds (id) VALUES ('$value');" >> /tmp/$RUNID-4.sql
+ echo "INSERT INTO ds_in (proc_id, ds_id, param) VALUES ('$id', '$value', 'undefined');" >> /tmp/$RUNID-5.sql
done < function-inputs.txt
while read thread appname; do
- $SQLCMD -c "UPDATE app_inv SET proc_name='$appname' WHERE id='$thread';"
+ echo "UPDATE proc SET name='$appname' WHERE id='$thread';" >> /tmp/$RUNID-3.sql
done < invocation-procedure-names.txt
while read start duration wfid rest; do
- $SQLCMD -c "UPDATE run SET start_time=$start WHERE id='$WF';"
- $SQLCMD -c "UPDATE run SET duration=$duration WHERE id='$WF';"
+ echo "UPDATE run SET start_time=$start WHERE id='$WF';" >> /tmp/$RUNID-1.sql
+ echo "UPDATE run SET duration=$duration WHERE id='$WF';" >> /tmp/$RUNID-1.sql
done < workflow.event
@@ -85,20 +91,20 @@
while read start duration thread final_state procname ; do
if [ "$duration" != "last-event-line" ]; then
compoundid=$WFID$thread
- $SQLCMD -c "INSERT INTO proc (id, type, name, run_id) VALUES ('$compoundid', 'compound', '$procname', '$WF');"
+ echo "INSERT INTO proc (id, type, name, run_id) VALUES ('$compoundid', 'compound', '$procname', '$WF');" >> /tmp/$RUNID-1.sql
fi
done < compound.event
while read start duration thread final_state procname ; do
if [ "$duration" != "last-event-line" ]; then
fqid=$WFID$thread
- $SQLCMD -c "INSERT INTO proc (id, type, name, run_id) VALUES ('$fqid', 'internal', '$procname', '$WF');"
+ echo "INSERT INTO proc (id, type, name, run_id) VALUES ('$fqid', 'internal', '$procname', '$WF');" >> /tmp/$RUNID-1.sql
fi
done < internalproc.event
while read t ; do
thread="${WFID}$t"
- $SQLCMD -c "INSERT INTO proc (id, type, name, run_id) VALUES ('$thread', 'scope', 'scope', '$WF');"
+ echo "INSERT INTO proc (id, type, name, run_id) VALUES ('$thread', 'scope', 'scope', '$WF');" >> /tmp/$RUNID-1.sql
done < scopes.txt
while read thread direction dataset variable rest; do
@@ -108,12 +114,8 @@
table=ds_out
fi
- EXISTING=$($SQLCMD --tuples-only -c "select count(*) from ds where ds.id='$dataset';")
-
- if [ "$EXISTING" -eq "0" ]; then
- $SQLCMD -c "INSERT INTO ds (id) VALUES ('$dataset');"
- fi
- $SQLCMD -c "INSERT INTO $table (proc_id, ds_id, param) VALUES ('$thread', '$dataset', '$variable');"
+ echo "INSERT INTO ds (id) VALUES ('$dataset');" >> /tmp/$RUNID-4.sql
+ echo "INSERT INTO $table (proc_id, ds_id, param) VALUES ('$thread', '$dataset', '$variable');" >> /tmp/$RUNID-5.sql
done < tie-data-invocs.txt
if [ -f extrainfo.txt ]; then
@@ -124,10 +126,10 @@
id=$($SQLCMD --tuples-only -c "select app_inv_id from app_exec where id='$execute2_id';" | awk '{print $1}')
while read name type value; do
if [ "$type" = "num" ]; then
- $SQLCMD -c "INSERT INTO a_proc_n (id, name, value) VALUES ('$id', '$name', $value);"
+ echo "INSERT INTO a_proc_n (id, name, value) VALUES ('$id', '$name', $value);" >> /tmp/$RUNID-6.sql
fi
if [ "$type" = "txt" ]; then
- $SQLCMD -c "INSERT INTO a_proc_t (id, name, value) VALUES ('$id', '$name', '$value');"
+ echo "INSERT INTO a_proc_t (id, name, value) VALUES ('$id', '$name', '$value');" >> /tmp/$RUNID-6.sql
fi
done < fields.txt
done < extrainfo.txt
@@ -141,8 +143,17 @@
max_virtual_mem=$(echo $runtime | awk -F "," '{print $4}' | awk -F ":" '{print $2}')
io_read_bytes=$(echo $runtime | awk -F "," '{print $5}' | awk -F ":" '{print $2}')
io_write_bytes=$(echo $runtime | awk -F "," '{print $6}' | awk -F ":" '{print $2}')
- $SQLCMD -c "INSERT INTO runtime_info (app_execution_id, tstamp, cpu_usage, max_phys_mem, max_virtual_mem, io_read_bytes, io_write_bytes) VALUES ('$execute2_id', $timestamp, $cpu_usage, $max_phys_mem, $max_virtual_mem, $io_read_bytes, $io_write_bytes);"
+ echo "INSERT INTO runtime_info (app_execution_id, tstamp, cpu_usage, max_phys_mem, max_virtual_mem, io_read_bytes, io_write_bytes) VALUES ('$execute2_id', $timestamp, $cpu_usage, $max_phys_mem, $max_virtual_mem, $io_read_bytes, $io_write_bytes);" >> /tmp/$RUNID-6.sql
done < runtime.txt
fi
+for i in `seq 1 6`
+do
+ cat /tmp/$RUNID-$i.sql | sort | uniq >> /tmp/$RUNID.sql
+ #rm /tmp/$RUNID-$i.sql
+done
+
+echo "COMMIT;" >> /tmp/$RUNID.sql
+$SQLCMD -f /tmp/$RUNID.sql
+#rm /tmp/$RUNID.sql
echo Finished sending SQL to DB
\ No newline at end of file
Modified: provenancedb/swift-prov-import-all-logs
===================================================================
--- provenancedb/swift-prov-import-all-logs 2012-02-14 05:34:00 UTC (rev 5608)
+++ provenancedb/swift-prov-import-all-logs 2012-02-14 16:15:37 UTC (rev 5609)
@@ -56,7 +56,8 @@
export WF="${RUNID}"
- $SQLCMD -c "INSERT INTO run (id, log_filename, swift_version, cog_version, final_state) VALUES ('$WF','$filename','$version', '', '$wfstatus');"
+ echo "BEGIN TRANSACTION;" > /tmp/$WF.sql
+ echo "INSERT INTO run (id, log_filename, swift_version, cog_version, final_state) VALUES ('$WF','$filename','$version', '', '$wfstatus');" >> /tmp/$WF.sql
echo version $version in log file $filename
echo ============= will import =============
More information about the Swift-commit
mailing list