[Swift-commit] r3713 - provenancedb
noreply at svn.ci.uchicago.edu
noreply at svn.ci.uchicago.edu
Tue Nov 23 13:35:18 CST 2010
Author: lgadelha
Date: 2010-11-23 13:35:18 -0600 (Tue, 23 Nov 2010)
New Revision: 3713
Modified:
provenancedb/prov-init.sql
provenancedb/prov-to-sql.sh
Log:
Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql 2010-11-23 06:39:25 UTC (rev 3712)
+++ provenancedb/prov-init.sql 2010-11-23 19:35:18 UTC (rev 3713)
@@ -1,28 +1,28 @@
-- this is the schema definition used for the main relational provenance
-- implementation (in both sqlite3 and postgres)
-DROP TABLE dataset;
-DROP TABLE file;
-DROP TABLE variable;
-DROP TABLE ds_containment;
-DROP TABLE process;
-DROP TABLE execute;
-DROP TABLE execute2;
-DROP TABLE workflow;
-DROP TABLE ds_usage;
-DROP TABLE annot_ds_num;
-DROP TABLE annot_ds_txt;
-DROP TABLE annot_ds_bool;
-DROP TABLE annot_p_num;
-DROP TABLE annot_p_txt;
-DROP TABLE annot_p_bool;
-DROP TABLE annot_wf_num;
-DROP TABLE annot_wf_txt;
-DROP TABLE annot_wf_bool;
-DROP TABLE extrainfo;
-DROP TABLE createarray;
-DROP TABLE createarray_member;
-DROP TABLE array_range;
+DROP TABLE dataset CASCADE;
+DROP TABLE file CASCADE;
+DROP TABLE variable CASCADE;
+DROP TABLE ds_containment CASCADE;
+DROP TABLE process CASCADE;
+DROP TABLE execute CASCADE;
+DROP TABLE execute2 CASCADE;
+DROP TABLE workflow CASCADE;
+DROP TABLE ds_usage CASCADE;
+DROP TABLE annot_ds_num CASCADE;
+DROP TABLE annot_ds_txt CASCADE;
+DROP TABLE annot_ds_bool CASCADE;
+DROP TABLE annot_p_num CASCADE;
+DROP TABLE annot_p_txt CASCADE;
+DROP TABLE annot_p_bool CASCADE;
+DROP TABLE annot_wf_num CASCADE;
+DROP TABLE annot_wf_txt CASCADE;
+DROP TABLE annot_wf_bool CASCADE;
+DROP TABLE extrainfo CASCADE;
+DROP TABLE createarray CASCADE;
+DROP TABLE createarray_member CASCADE;
+DROP TABLE array_range CASCADE;
-- workflow stores some information about each workflow log that has
-- been seen by the importer: the log filename, swift version and import
@@ -231,6 +231,81 @@
PRIMARY KEY (array_id,from_id,to_id,step_id)
);
+
+-- lists distinct processes by name in a workflow
+
+CREATE OR REPLACE FUNCTION process_names(varchar) RETURNS SETOF varchar AS $$
+ SELECT DISTINCT(process.name)
+ FROM process
+ WHERE process.workflow_id=$1;
+$$ LANGUAGE SQL;
+
+
+-- OOPS-specific functions
+
+CREATE OR REPLACE FUNCTION list_oops_runs() RETURNS SETOF varchar AS $$
+ SELECT DISTINCT(value) FROM annot_wf_txt WHERE annot_wf_txt.name=('oops_run_id');
+$$ LANGUAGE SQL;
+
+CREATE TYPE oops_summary AS (oops_run_id varchar, start_time timestamp with time zone, duration_sec numeric, swift_version varchar);
+
+CREATE OR REPLACE FUNCTION oops_run_summary(varchar) RETURNS SETOF oops_summary AS $$
+ SELECT annot_wf_txt.value as oops_run_id, to_timestamp(workflow.start_time) as start_time,
+ workflow.duration as duration_sec,workflow.swift_version as swift_version
+ FROM annot_wf_txt,workflow
+ WHERE annot_wf_txt.id=workflow.id and annot_wf_txt.name='oops_run_id' and annot_wf_txt.value=$1;
+$$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION oops_process_names(varchar) RETURNS SETOF varchar AS $$
+ SELECT DISTINCT(process.name)
+ FROM process, annot_wf_txt
+ WHERE process.workflow_id=annot_wf_txt.id AND
+ annot_wf_txt.name='oops_run_id' AND annot_wf_txt.value=$1;
+$$ LANGUAGE SQL;
+
+CREATE TYPE oops_variable_summary AS (oops_run_id varchar, param_name varchar, value varchar);
+
+CREATE OR REPLACE FUNCTION oops_variable_summary(varchar) RETURNS SETOF oops_variable_summary $$
+ SELECT annot_wf_txt.value,ds_usage.param_name,variable.value
+ FROM variable,ds_usage,process,annot_wf_txt
+ WHERE variable.id=ds_usage.dataset_id and ds_usage.process_id=process.id and process.workflow_id=annot_wf_txt.id;
+$$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION oops_science_summary(varchar) RETURNS SETOF oops_variable_summary AS $$
+ SELECT ds_usage.param_name,variable.value
+ FROM variable,ds_usage,process,annot_wf_txt
+ WHERE variable.id=ds_usage.dataset_id AND ds_usage.process_id=process.id AND process.workflow_id=annot_wf_txt.id AND
+ (ds_usage.param_name='proteinId' OR ds_usage.param_name='targetId' OR ds_usage.param_name='seqFile' OR
+ ds_usage.param_name='prot' OR ds_usage.param_name='prepTarFile' OR ds_usage.param_name='nSim') AND
+ annot_wf_txt.name='oops_run_id' AND annot_wf_txt.value='h733';
+$$ LANGUAGE SQL;
+
+
+
+
+
+
+
+
+
+
+CREATE OR REPLACE FUNCTION annotation(entity varchar, name varchar) RETURNS anyelement AS $$
+ IF entity = 'workflow' THEN
+
+ ELSE
+ IF entity = 'process' THEN
+
+ ELSE
+ IF entity = 'dataset' THEN
+
+ ELSE
+
+ END IF;
+ END IF;
+ END IF;
+$$ LANGUAGE 'plpgsql';
+
+
-- this GRANT does not work for sqlite; you'll get a syntax error but
-- ignore it, as it is not needed in sqlite
grant all on
Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh 2010-11-23 06:39:25 UTC (rev 3712)
+++ provenancedb/prov-to-sql.sh 2010-11-23 19:35:18 UTC (rev 3713)
@@ -9,13 +9,12 @@
echo Generating SQL for $RUNID
-rm -f tmp-import.sql
+rm -f tmp-import.sql import.sql tmp-ds.sql
# this gives a distinction between the root process for a workflow and the
# workflow itself. perhaps better to model the workflow as a process
echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" >> tmp-import.sql
-
while read time duration thread localthread endstate tr_name scratch; do
echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$thread', 'execute', '$tr_name', '$WF');" >> tmp-import.sql
echo "INSERT INTO execute (id, start_time, duration, final_state, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$scratch');" >> tmp-import.sql
@@ -32,25 +31,16 @@
while read col1 col2 col3 col4 col5 threadst namest lhsst rhsst resultst; do
thread=`echo $threadst | awk 'BEGIN { FS = "=" }; {print $2}'`
- name=`echo $name | awk 'BEGIN { FS = "=" }; {print $2}'`
+ name=`echo $namest | awk 'BEGIN { FS = "=" }; {print $2}'`
lhs=`echo $lhsst | awk 'BEGIN { FS = "=" }; {print $2}'`
rhs=`echo $rhsst | awk 'BEGIN { FS = "=" }; {print $2}'`
result=`echo $resultst | awk 'BEGIN { FS = "=" }; {print $2}'`
operatorid="${WFID}operator:$thread"
- EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$lhs';")
- if [ "$EXISTING" -eq "0" ]; then
- echo "INSERT INTO dataset (id) VALUES ('$lhs');" >> tmp-import.sql
- fi
- EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$rhs';")
- if [ "$EXISTING" -eq "0" ]; then
- echo "INSERT INTO dataset (id) VALUES ('$rhs');" >> tmp-import.sql
- fi
- EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$result';")
- if [ "$EXISTING" -eq "0" ]; then
- echo "INSERT INTO dataset (id) VALUES ('$result');" >> tmp-import.sql
- fi
+ echo "INSERT INTO dataset (id) VALUES ('$lhs');" >> tmp-ds.sql
+ echo "INSERT INTO dataset (id) VALUES ('$rhs');" >> tmp-ds.sql
+ echo "INSERT INTO dataset (id) VALUES ('$result');" >> tmp-ds.sql
echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$operatorid', 'operator', '$name', '$WF');" >> tmp-import.sql
echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$lhs', 'lhs');" >> tmp-import.sql
echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$rhs', 'rhs');" >> tmp-import.sql
@@ -58,20 +48,14 @@
done < operators.txt
while read id name output; do
- EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$id';")
- if [ "$EXISTING" -eq "0" ]; then
- echo "INSERT INTO dataset (id) VALUES ('$id');" >> tmp-import.sql
- fi
+ echo "INSERT INTO dataset (id) VALUES ('$output');" >> tmp-ds.sql
echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$id', 'function', '$name', '$WF');" >> tmp-import.sql
echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'O', '$output', 'result');" >> tmp-import.sql
done < functions.txt
while read id value; do
# TODO need ordering/naming
- EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$id';")
- if [ "$EXISTING" -eq "0" ]; then
- echo "INSERT INTO dataset (id) VALUES ('$id');" >> tmp-import.sql
- fi
+ echo "INSERT INTO dataset (id) VALUES ('$value');" >> tmp-ds.sql
echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'I', '$value', 'undefined');" >> tmp-import.sql
done < function-inputs.txt
@@ -81,10 +65,7 @@
else
dir=O
fi
- EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$dataset';")
- if [ "$EXISTING" -eq "0" ]; then
- echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-import.sql
- fi
+ echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$thread', '$dir', '$dataset', '$variable');" >> tmp-import.sql
done < tie-data-invocs.txt
@@ -93,30 +74,18 @@
done < invocation-procedure-names.txt
while read outer inner; do
- EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$outer';")
- if [ "$EXISTING" -eq "0" ]; then
- echo "INSERT INTO dataset (id) VALUES ('$outer');" >> tmp-import.sql
- fi
- EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$inner';")
- if [ "$EXISTING" -eq "0" ]; then
- echo "INSERT INTO dataset (id) VALUES ('$inner');" >> tmp-import.sql
- fi
+ echo "INSERT INTO dataset (id) VALUES ('$outer');" >> tmp-ds.sql
+ echo "INSERT INTO dataset (id) VALUES ('$inner');" >> tmp-ds.sql
echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');" >> tmp-import.sql
done < tie-containers.txt
while read dataset filename; do
- EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$dataset';")
- if [ "$EXISTING" -eq "0" ]; then
- echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-import.sql
- fi
+ echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
echo "INSERT INTO file (id, filename) VALUES ('$dataset', '$filename');" >> tmp-import.sql
done < dataset-filenames.txt
while read dataset value; do
- EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$dataset';")
- if [ "$EXISTING" -eq "0" ]; then
- echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-import.sql
- fi
+ echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
echo "INSERT INTO variable (id, value) VALUES ('$dataset', '$value');" >> tmp-import.sql
done < dataset-values.txt
@@ -154,9 +123,13 @@
echo "INSERT INTO createarray_member (array_id, ix, member_id) VALUES ('$arrayid', '$index', '$memberid');" >> tmp-import.sql
done < createarray-members.txt
+echo "BEGIN;" > import.sql
+cat tmp-ds.sql | sort | uniq >> import.sql
+cat tmp-import.sql >> import.sql
+echo "COMMIT;" >> import.sql
echo Sending SQL to DB
-$SQLCMD < tmp-import.sql
+$SQLCMD < import.sql
echo Finished sending SQL to DB
More information about the Swift-commit
mailing list