[Swift-commit] r3714 - provenancedb
noreply at svn.ci.uchicago.edu
noreply at svn.ci.uchicago.edu
Tue Nov 23 15:36:41 CST 2010
Author: lgadelha
Date: 2010-11-23 15:36:40 -0600 (Tue, 23 Nov 2010)
New Revision: 3714
Modified:
provenancedb/prov-init.sql
provenancedb/prov-to-sql.sh
Log:
Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql 2010-11-23 19:35:18 UTC (rev 3713)
+++ provenancedb/prov-init.sql 2010-11-23 21:36:40 UTC (rev 3714)
@@ -281,31 +281,6 @@
$$ LANGUAGE SQL;
-
-
-
-
-
-
-
-
-CREATE OR REPLACE FUNCTION annotation(entity varchar, name varchar) RETURNS anyelement AS $$
- IF entity = 'workflow' THEN
-
- ELSE
- IF entity = 'process' THEN
-
- ELSE
- IF entity = 'dataset' THEN
-
- ELSE
-
- END IF;
- END IF;
- END IF;
-$$ LANGUAGE 'plpgsql';
-
-
-- this GRANT does not work for sqlite; you'll get a syntax error but
-- ignore it, as it is not needed in sqlite
grant all on
Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh 2010-11-23 19:35:18 UTC (rev 3713)
+++ provenancedb/prov-to-sql.sh 2010-11-23 21:36:40 UTC (rev 3714)
@@ -9,22 +9,22 @@
echo Generating SQL for $RUNID
-rm -f tmp-import.sql import.sql tmp-ds.sql
+rm -f tmp-ds.sql tmp-p.sql tmp-e.sql tmp-e2.sql tmp-dsu.sql tmp-dsc.sql tmp-f.sql tmp-v.sql tmp-import.sql import.sql
# this gives a distinction between the root process for a workflow and the
# workflow itself. perhaps better to model the workflow as a process
-echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" >> tmp-import.sql
+echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" >> tmp-p.sql
while read time duration thread localthread endstate tr_name scratch; do
- echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$thread', 'execute', '$tr_name', '$WF');" >> tmp-import.sql
- echo "INSERT INTO execute (id, start_time, duration, final_state, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$scratch');" >> tmp-import.sql
+ echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$thread', 'execute', '$tr_name', '$WF');" >> tmp-p.sql
+ echo "INSERT INTO execute (id, start_time, duration, final_state, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$scratch');" >> tmp-e.sql
done < execute.global.event
while read start_time duration globalid id endstate thread site scratch; do
# cut off the last component of the thread, so that we end up at the
# parent thread id which should correspond with the execute-level ID
inv_id="$WFID$(echo $thread | sed 's/-[^-]*$//')"
- echo "INSERT INTO execute2 (id, execute_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');" >> tmp-import.sql
+ echo "INSERT INTO execute2 (id, execute_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');" >> tmp-e2.sql
done < execute2.global.event
@@ -41,57 +41,48 @@
echo "INSERT INTO dataset (id) VALUES ('$lhs');" >> tmp-ds.sql
echo "INSERT INTO dataset (id) VALUES ('$rhs');" >> tmp-ds.sql
echo "INSERT INTO dataset (id) VALUES ('$result');" >> tmp-ds.sql
- echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$operatorid', 'operator', '$name', '$WF');" >> tmp-import.sql
- echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$lhs', 'lhs');" >> tmp-import.sql
- echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$rhs', 'rhs');" >> tmp-import.sql
- echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'O', '$result', 'result');" >> tmp-import.sql
+ echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$operatorid', 'operator', '$name', '$WF');" >> tmp-p.sql
+ echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$lhs', 'lhs');" >> tmp-dsu.sql
+ echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$rhs', 'rhs');" >> tmp-dsu.sql
+ echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'O', '$result', 'result');" >> tmp-dsu.sql
done < operators.txt
while read id name output; do
echo "INSERT INTO dataset (id) VALUES ('$output');" >> tmp-ds.sql
- echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$id', 'function', '$name', '$WF');" >> tmp-import.sql
- echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'O', '$output', 'result');" >> tmp-import.sql
+ echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$id', 'function', '$name', '$WF');" >> tmp-p.sql
+ echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'O', '$output', 'result');" >> tmp-dsu.sql
done < functions.txt
while read id value; do
# TODO need ordering/naming
echo "INSERT INTO dataset (id) VALUES ('$value');" >> tmp-ds.sql
- echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'I', '$value', 'undefined');" >> tmp-import.sql
+ echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'I', '$value', 'undefined');" >> tmp-dsu.sql
done < function-inputs.txt
-while read thread direction dataset variable rest; do
- if [ "$direction" == "input" ] ; then
- dir=I
- else
- dir=O
- fi
- echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
- echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$thread', '$dir', '$dataset', '$variable');" >> tmp-import.sql
-done < tie-data-invocs.txt
while read thread appname; do
- echo "UPDATE execute SET procedure_name='$appname' WHERE id='$thread';" >> tmp-import.sql
+ echo "UPDATE execute SET procedure_name='$appname' WHERE id='$thread';" >> tmp-e.sql
done < invocation-procedure-names.txt
while read outer inner; do
echo "INSERT INTO dataset (id) VALUES ('$outer');" >> tmp-ds.sql
echo "INSERT INTO dataset (id) VALUES ('$inner');" >> tmp-ds.sql
- echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');" >> tmp-import.sql
+ echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');" >> tmp-dsc.sql
done < tie-containers.txt
while read dataset filename; do
echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
- echo "INSERT INTO file (id, filename) VALUES ('$dataset', '$filename');" >> tmp-import.sql
+ echo "INSERT INTO file (id, filename) VALUES ('$dataset', '$filename');" >> tmp-f.sql
done < dataset-filenames.txt
while read dataset value; do
echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
- echo "INSERT INTO variable (id, value) VALUES ('$dataset', '$value');" >> tmp-import.sql
+ echo "INSERT INTO variable (id, value) VALUES ('$dataset', '$value');" >> tmp-v.sql
done < dataset-values.txt
while read start duration wfid rest; do
- echo "UPDATE workflow SET start_time=$start WHERE id='$WF';" >> tmp-import.sql
- echo "UPDATE workflow SET duration=$duration WHERE id='$WF';" >> tmp-import.sql
+ echo "UPDATE workflow SET start_time=$start WHERE id='$WF';" >> tmp-u.sql
+ echo "UPDATE workflow SET duration=$duration WHERE id='$WF';" >> tmp-u.sql
done < workflow.event
#while read id extrainfo ; do
@@ -102,19 +93,29 @@
# TODO this could merge with other naming tables
while read start duration thread final_state procname ; do
compoundid=$WFID$thread
- echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$compoundid', 'compound', '$procname', '$WF');" >> tmp-import.sql
+ echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$compoundid', 'compound', '$procname', '$WF');" >> tmp-p.sql
done < compound.event
while read start duration thread final_state procname ; do
fqid=$WFID$thread
- echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$fqid', 'internal', '$procname', '$WF');" >> tmp-import.sql
+ echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$fqid', 'internal', '$procname', '$WF');" >> tmp-p.sql
done < internalproc.event
while read t ; do
thread="${WFID}$t"
- echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$thread', 'scope', 'scope', '$WF');" >> tmp-import.sql
+ echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$thread', 'scope', 'scope', '$WF');" >> tmp-p.sql
done < scopes.txt
+while read thread direction dataset variable rest; do
+ if [ "$direction" == "input" ] ; then
+ dir=I
+ else
+ dir=O
+ fi
+ echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
+ echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$thread', '$dir', '$dataset', '$variable');" >> tmp-dsu.sql
+done < tie-data-invocs.txt
+
while read id ; do
echo "INSERT INTO createarray (array_id) VALUES ('$id');" >> tmp-import.sql
done < createarray.txt
@@ -123,10 +124,17 @@
echo "INSERT INTO createarray_member (array_id, ix, member_id) VALUES ('$arrayid', '$index', '$memberid');" >> tmp-import.sql
done < createarray-members.txt
-echo "BEGIN;" > import.sql
-cat tmp-ds.sql | sort | uniq >> import.sql
-cat tmp-import.sql >> import.sql
-echo "COMMIT;" >> import.sql
+cat tmp-ds.sql | sort | uniq > import.sql
+cat tmp-f.sql | sort | uniq >> import.sql
+cat tmp-v.sql | sort | uniq >> import.sql
+cat tmp-p.sql | sort | uniq >> import.sql
+cat tmp-e.sql | sort | uniq >> import.sql
+cat tmp-e2.sql | sort | uniq >> import.sql
+cat tmp-dsu.sql | sort | uniq >> import.sql
+cat tmp-dsc.sql | sort | uniq >> import.sql
+cat tmp-import.sql | sort | uniq >> import.sql
+cat tmp-u.sql | sort | uniq >> import.sql
+
echo Sending SQL to DB
$SQLCMD < import.sql
More information about the Swift-commit
mailing list