[Swift-commit] r3966 - provenancedb
noreply at svn.ci.uchicago.edu
noreply at svn.ci.uchicago.edu
Fri Jan 14 17:27:10 CST 2011
Author: lgadelha
Date: 2011-01-14 17:27:10 -0600 (Fri, 14 Jan 2011)
New Revision: 3966
Modified:
provenancedb/pql_functions.sql
provenancedb/prov-to-sql.sh
Log:
Added generic annotation process using SWIFT_EXTRA_INFO from env profile.
Modified: provenancedb/pql_functions.sql
===================================================================
--- provenancedb/pql_functions.sql 2011-01-12 19:35:11 UTC (rev 3965)
+++ provenancedb/pql_functions.sql 2011-01-14 23:27:10 UTC (rev 3966)
@@ -152,15 +152,17 @@
-- recursive query to find ancestor entities in a provenance graph
-create or replace function ancestors(varchar) returns setof varchar as $$
- with recursive anc(ancestor,descendant) as
+CREATE OR REPLACE FUNCTION ancestors(varchar) RETURNS SETOF varchar AS $$
+ WITH RECURSIVE anc(ancestor,descendant) AS
(
- select parent as ancestor, child as descendant from parent_of where child=$1
- union all
- select parent_of.parent as ancestor, anc.descendant as descendant
- from anc,parent_of
- where anc.ancestor=parent_of.child
+ SELECT parent AS ancestor, child AS descendant FROM parent_of WHERE child=$1
+ UNION
+ SELECT parent_of.parent AS ancestor, anc.descendant AS descendant
+ FROM anc,parent_of
+ WHERE anc.ancestor=parent_of.child
)
- select ancestor from anc
-$$ language sql;
+ SELECT ancestor FROM anc
+$$ LANGUAGE SQL;
+
+
Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh 2011-01-12 19:35:11 UTC (rev 3965)
+++ provenancedb/prov-to-sql.sh 2011-01-14 23:27:10 UTC (rev 3966)
@@ -9,22 +9,21 @@
echo Generating SQL for $RUNID
-rm -f tmp-u.sql tmp-ds.sql tmp-p.sql tmp-e.sql tmp-e2.sql tmp-dsu.sql tmp-dsc.sql tmp-f.sql tmp-v.sql tmp-import.sql import.sql
# this gives a distinction between the root process for a workflow and the
# workflow itself. perhaps better to model the workflow as a process
-echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" >> tmp-p.sql
+echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" > tmp-p.sql
while read time duration thread localthread endstate tr_name scratch; do
echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$thread', 'execute', '$tr_name', '$WF');" >> tmp-p.sql
- echo "INSERT INTO execute (id, start_time, duration, final_state, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$scratch');" >> tmp-e.sql
+ echo "INSERT INTO execute (id, start_time, duration, final_state, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$scratch');" > tmp-e.sql
done < execute.global.event
while read start_time duration globalid id endstate thread site scratch; do
# cut off the last component of the thread, so that we end up at the
# parent thread id which should correspond with the execute-level ID
inv_id="$WFID$(echo $thread | sed 's/-[^-]*$//')"
- echo "INSERT INTO execute2 (id, execute_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');" >> tmp-e2.sql
+ echo "INSERT INTO execute2 (id, execute_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');" > tmp-e2.sql
done < execute2.global.event
while read col1 col2 col3 col4 col5 thread name lhs rhs result; do
@@ -42,11 +41,11 @@
result=$(echo $result | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
fi
- echo "INSERT INTO dataset (id) VALUES ('$lhs');" >> tmp-ds.sql
+ echo "INSERT INTO dataset (id) VALUES ('$lhs');" > tmp-ds.sql
echo "INSERT INTO dataset (id) VALUES ('$rhs');" >> tmp-ds.sql
echo "INSERT INTO dataset (id) VALUES ('$result');" >> tmp-ds.sql
echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$operatorid', 'operator', '$name', '$WF');" >> tmp-p.sql
- echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$lhs', 'lhs');" >> tmp-dsu.sql
+ echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$lhs', 'lhs');" > tmp-dsu.sql
echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$rhs', 'rhs');" >> tmp-dsu.sql
echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'O', '$result', 'result');" >> tmp-dsu.sql
done < operators.txt
@@ -85,7 +84,7 @@
echo "INSERT INTO dataset (id) VALUES ('$outer');" >> tmp-ds.sql
echo "INSERT INTO dataset (id) VALUES ('$inner');" >> tmp-ds.sql
- echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');" >> tmp-dsc.sql
+ echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');" > tmp-dsc.sql
done < tie-containers.txt
while read dataset filename; do
@@ -95,7 +94,7 @@
fi
echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
- echo "INSERT INTO file (id, filename) VALUES ('$dataset', '$filename');" >> tmp-f.sql
+ echo "INSERT INTO file (id, filename) VALUES ('$dataset', '$filename');" > tmp-f.sql
done < dataset-filenames.txt
while read dataset value; do
@@ -105,19 +104,39 @@
fi
echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
- echo "INSERT INTO variable (id, value) VALUES ('$dataset', '$value');" >> tmp-v.sql
+ echo "INSERT INTO variable (id, value) VALUES ('$dataset', '$value');" > tmp-v.sql
done < dataset-values.txt
while read start duration wfid rest; do
- echo "UPDATE workflow SET start_time=$start WHERE id='$WF';" >> tmp-u.sql
+ echo "UPDATE workflow SET start_time=$start WHERE id='$WF';" > tmp-u.sql
echo "UPDATE workflow SET duration=$duration WHERE id='$WF';" >> tmp-u.sql
done < workflow.event
-#while read id extrainfo ; do
-# TODO this will not like quotes and things like that in extrainfo
-# echo "INSERT INTO extrainfo (id, extrainfo) VALUES ('$id', '$extrainfo');" >> tmp-import.sql
-#done < extrainfo.txt
+
+while read id extrainfo; do
+ echo $extrainfo | awk -F ";" '{ for (i = 1; i <= NF; i++)
+ print $i
+ }' | awk -F "=" '{ print $1 " " $2 }' | awk -F ":" '{ print $1 " " $2 }' > fields.txt
+ while read name type value; do
+ if [ "$type" = "num" ]; then
+ echo "INSERT INTO annot_p_num (id, name, value) VALUES ('$id', '$name', $value);" > tmp-import.sql
+ fi
+ if [ "$type" = "txt" ]; then
+ echo "INSERT INTO annot_p_txt (id, name, value) VALUES ('$id', '$name', '$value');" >> tmp-import.sql
+ fi
+ if [ "$type" = "bool" ]; then
+ echo "INSERT INTO annot_p_bool (id, name, value) VALUES ('$id', '$name', $value);" >> tmp-import.sql
+ fi
+ done < fields.txt
+done < extrainfo.txt
+
+
+while read id extrainfo ; do
+
+ echo "INSERT INTO extrainfo (id, extrainfo) VALUES ('$id', '$extrainfo');" >> tmp-import.sql
+done < extrainfo.txt
+
# TODO this could merge with other naming tables
while read start duration thread final_state procname ; do
if [ "$duration" != "last-event-line" ]; then
@@ -174,6 +193,7 @@
echo Sending SQL to DB
$SQLCMD < import.sql
+rm -f tmp-u.sql tmp-ds.sql tmp-p.sql tmp-e.sql tmp-e2.sql tmp-dsu.sql tmp-dsc.sql tmp-f.sql tmp-v.sql tmp-import.sql import.sql fields.txt
echo Finished sending SQL to DB
More information about the Swift-commit
mailing list