[Swift-commit] r3713 - provenancedb

noreply at svn.ci.uchicago.edu noreply at svn.ci.uchicago.edu
Tue Nov 23 13:35:18 CST 2010


Author: lgadelha
Date: 2010-11-23 13:35:18 -0600 (Tue, 23 Nov 2010)
New Revision: 3713

Modified:
   provenancedb/prov-init.sql
   provenancedb/prov-to-sql.sh
Log:


Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql	2010-11-23 06:39:25 UTC (rev 3712)
+++ provenancedb/prov-init.sql	2010-11-23 19:35:18 UTC (rev 3713)
@@ -1,28 +1,28 @@
 -- this is the schema definition used for the main relational provenance
 -- implementation (in both sqlite3 and postgres)
    
-DROP TABLE dataset;
-DROP TABLE file;
-DROP TABLE variable;
-DROP TABLE ds_containment;
-DROP TABLE process;
-DROP TABLE execute;
-DROP TABLE execute2;
-DROP TABLE workflow;
-DROP TABLE ds_usage;
-DROP TABLE annot_ds_num;
-DROP TABLE annot_ds_txt;
-DROP TABLE annot_ds_bool;
-DROP TABLE annot_p_num;
-DROP TABLE annot_p_txt;
-DROP TABLE annot_p_bool;
-DROP TABLE annot_wf_num;
-DROP TABLE annot_wf_txt;
-DROP TABLE annot_wf_bool;
-DROP TABLE extrainfo;
-DROP TABLE createarray;
-DROP TABLE createarray_member;
-DROP TABLE array_range;
+DROP TABLE dataset CASCADE;
+DROP TABLE file CASCADE;
+DROP TABLE variable CASCADE;
+DROP TABLE ds_containment CASCADE;
+DROP TABLE process CASCADE;
+DROP TABLE execute CASCADE;
+DROP TABLE execute2 CASCADE;
+DROP TABLE workflow CASCADE;
+DROP TABLE ds_usage CASCADE;
+DROP TABLE annot_ds_num CASCADE;
+DROP TABLE annot_ds_txt CASCADE;
+DROP TABLE annot_ds_bool CASCADE;
+DROP TABLE annot_p_num CASCADE;
+DROP TABLE annot_p_txt CASCADE;
+DROP TABLE annot_p_bool CASCADE;
+DROP TABLE annot_wf_num CASCADE;
+DROP TABLE annot_wf_txt CASCADE;
+DROP TABLE annot_wf_bool CASCADE;
+DROP TABLE extrainfo CASCADE;
+DROP TABLE createarray CASCADE;
+DROP TABLE createarray_member CASCADE;
+DROP TABLE array_range CASCADE;
 
 -- workflow stores some information about each workflow log that has
 -- been seen by the importer: the log filename, swift version and import
@@ -231,6 +231,81 @@
       PRIMARY KEY (array_id,from_id,to_id,step_id)
     );
 
+
+-- lists distinct processes by name in a workflow
+
+CREATE OR REPLACE FUNCTION process_names(varchar) RETURNS SETOF varchar AS $$
+	   SELECT DISTINCT(process.name)
+	   FROM   process
+       WHERE  process.workflow_id=$1;
+$$ LANGUAGE SQL;
+
+
+-- OOPS-specific functions
+
+CREATE OR REPLACE FUNCTION list_oops_runs() RETURNS SETOF varchar AS $$
+       SELECT DISTINCT(value) FROM annot_wf_txt WHERE annot_wf_txt.name=('oops_run_id');
+$$ LANGUAGE SQL;
+
+CREATE TYPE oops_summary AS (oops_run_id varchar, start_time timestamp with time zone, duration_sec numeric, swift_version varchar);
+
+CREATE OR REPLACE FUNCTION oops_run_summary(varchar) RETURNS SETOF oops_summary AS $$
+       SELECT annot_wf_txt.value as oops_run_id, to_timestamp(workflow.start_time) as start_time,
+              workflow.duration as duration_sec,workflow.swift_version as swift_version
+       FROM   annot_wf_txt,workflow
+       WHERE  annot_wf_txt.id=workflow.id and annot_wf_txt.name='oops_run_id' and annot_wf_txt.value=$1;
+$$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION oops_process_names(varchar) RETURNS SETOF varchar AS $$
+	   SELECT DISTINCT(process.name)
+	   FROM   process, annot_wf_txt
+       WHERE  process.workflow_id=annot_wf_txt.id AND 
+              annot_wf_txt.name='oops_run_id' AND annot_wf_txt.value=$1;
+$$ LANGUAGE SQL;
+
+CREATE TYPE oops_variable_summary AS (oops_run_id varchar, param_name varchar, value varchar);
+
+CREATE OR REPLACE FUNCTION oops_variable_summary(varchar) RETURNS SETOF oops_variable_summary $$
+	SELECT annot_wf_txt.value,ds_usage.param_name,variable.value 
+	FROM   variable,ds_usage,process,annot_wf_txt
+	WHERE  variable.id=ds_usage.dataset_id and ds_usage.process_id=process.id and process.workflow_id=annot_wf_txt.id;
+$$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION oops_science_summary(varchar) RETURNS SETOF oops_variable_summary AS $$
+	SELECT ds_usage.param_name,variable.value
+	FROM   variable,ds_usage,process,annot_wf_txt
+	WHERE  variable.id=ds_usage.dataset_id AND ds_usage.process_id=process.id AND process.workflow_id=annot_wf_txt.id AND
+	       (ds_usage.param_name='proteinId' OR ds_usage.param_name='targetId' OR ds_usage.param_name='seqFile' OR 
+	       ds_usage.param_name='prot' OR ds_usage.param_name='prepTarFile' OR ds_usage.param_name='nSim') AND
+	       annot_wf_txt.name='oops_run_id' AND annot_wf_txt.value='h733';
+$$ LANGUAGE SQL;
+
+
+
+
+
+
+
+
+
+
+CREATE OR REPLACE FUNCTION annotation(entity varchar, name varchar) RETURNS anyelement AS $$
+	IF entity = 'workflow' THEN
+		
+	ELSE
+		IF entity = 'process' THEN
+		
+		ELSE
+			IF entity = 'dataset' THEN
+			
+			ELSE
+			
+			END IF;
+		END IF;
+	END IF;	
+$$ LANGUAGE 'plpgsql';
+
+
 -- this GRANT does not work for sqlite; you'll get a syntax error but
 -- ignore it, as it is not needed in sqlite
 grant all on  

Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh	2010-11-23 06:39:25 UTC (rev 3712)
+++ provenancedb/prov-to-sql.sh	2010-11-23 19:35:18 UTC (rev 3713)
@@ -9,13 +9,12 @@
 
 echo Generating SQL for $RUNID
 
-rm -f tmp-import.sql
+rm -f tmp-import.sql import.sql tmp-ds.sql
 
 # this gives a distinction between the root process for a workflow and the
 # workflow itself. perhaps better to model the workflow as a process
 echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" >> tmp-import.sql
 
-
 while read time duration thread localthread endstate tr_name scratch; do
     echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$thread', 'execute', '$tr_name', '$WF');" >> tmp-import.sql
     echo "INSERT INTO execute (id, start_time, duration, final_state, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$scratch');" >> tmp-import.sql
@@ -32,25 +31,16 @@
 
 while read col1 col2 col3 col4 col5 threadst namest lhsst rhsst resultst; do
     thread=`echo $threadst | awk 'BEGIN { FS = "=" }; {print $2}'`
-    name=`echo $name | awk 'BEGIN { FS = "=" }; {print $2}'`
+    name=`echo $namest | awk 'BEGIN { FS = "=" }; {print $2}'`
     lhs=`echo $lhsst | awk 'BEGIN { FS = "=" }; {print $2}'`
     rhs=`echo $rhsst | awk 'BEGIN { FS = "=" }; {print $2}'`
     result=`echo $resultst | awk 'BEGIN { FS = "=" }; {print $2}'`
     
     operatorid="${WFID}operator:$thread"
     
-    EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$lhs';")
-    if [ "$EXISTING" -eq "0" ];  then
-	echo "INSERT INTO dataset (id) VALUES ('$lhs');" >> tmp-import.sql
-    fi
-    EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$rhs';")
-    if [ "$EXISTING" -eq "0" ];  then
-	echo "INSERT INTO dataset (id) VALUES ('$rhs');" >> tmp-import.sql
-    fi    
-    EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$result';")
-    if [ "$EXISTING" -eq "0" ];  then
-	echo "INSERT INTO dataset (id) VALUES ('$result');" >> tmp-import.sql
-    fi
+	echo "INSERT INTO dataset (id) VALUES ('$lhs');" >> tmp-ds.sql
+ 	echo "INSERT INTO dataset (id) VALUES ('$rhs');" >> tmp-ds.sql
+	echo "INSERT INTO dataset (id) VALUES ('$result');" >> tmp-ds.sql
     echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$operatorid', 'operator', '$name', '$WF');" >> tmp-import.sql
     echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$lhs', 'lhs');" >> tmp-import.sql
     echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$rhs', 'rhs');" >> tmp-import.sql
@@ -58,20 +48,14 @@
 done < operators.txt
 
 while read id name output; do
-    EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$id';")
-    if [ "$EXISTING" -eq "0" ];  then
-	echo "INSERT INTO dataset (id) VALUES ('$id');" >> tmp-import.sql
-    fi    
+	echo "INSERT INTO dataset (id) VALUES ('$output');" >> tmp-ds.sql
     echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$id', 'function', '$name', '$WF');" >> tmp-import.sql
     echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'O', '$output', 'result');" >> tmp-import.sql
 done < functions.txt
 
 while read id value; do
     # TODO need ordering/naming
-    EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$id';")
-    if [ "$EXISTING" -eq "0" ];  then
-	echo "INSERT INTO dataset (id) VALUES ('$id');" >> tmp-import.sql
-    fi       
+	echo "INSERT INTO dataset (id) VALUES ('$value');" >> tmp-ds.sql
     echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'I', '$value', 'undefined');" >> tmp-import.sql
 done < function-inputs.txt
 
@@ -81,10 +65,7 @@
     else
 	dir=O
     fi
-    EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$dataset';")
-    if [ "$EXISTING" -eq "0" ];  then
-	echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-import.sql
-    fi       
+	echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
     echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$thread', '$dir', '$dataset', '$variable');" >> tmp-import.sql
 done < tie-data-invocs.txt
 
@@ -93,30 +74,18 @@
 done < invocation-procedure-names.txt
 
 while read outer inner; do
-    EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$outer';")
-    if [ "$EXISTING" -eq "0" ];  then
-	echo "INSERT INTO dataset (id) VALUES ('$outer');" >> tmp-import.sql
-    fi       
-    EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$inner';")
-    if [ "$EXISTING" -eq "0" ];  then
-	echo "INSERT INTO dataset (id) VALUES ('$inner');" >> tmp-import.sql
-    fi       
+	echo "INSERT INTO dataset (id) VALUES ('$outer');" >> tmp-ds.sql
+	echo "INSERT INTO dataset (id) VALUES ('$inner');" >> tmp-ds.sql
     echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');" >> tmp-import.sql
 done < tie-containers.txt
 
 while read dataset filename; do
-    EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$dataset';")
-    if [ "$EXISTING" -eq "0" ];  then
-	echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-import.sql
-    fi           
+	echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
     echo "INSERT INTO file (id, filename) VALUES ('$dataset', '$filename');" >> tmp-import.sql
 done < dataset-filenames.txt
 
 while read dataset value; do
-    EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$dataset';")
-    if [ "$EXISTING" -eq "0" ];  then
-	echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-import.sql
-    fi           
+	echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
     echo "INSERT INTO variable (id, value) VALUES ('$dataset', '$value');" >> tmp-import.sql
 done < dataset-values.txt
 
@@ -154,9 +123,13 @@
     echo "INSERT INTO createarray_member (array_id, ix, member_id) VALUES ('$arrayid', '$index', '$memberid');" >> tmp-import.sql
 done < createarray-members.txt
 
+echo "BEGIN;" > import.sql
+cat tmp-ds.sql | sort | uniq >> import.sql
+cat tmp-import.sql >> import.sql
+echo "COMMIT;" >> import.sql 
 echo Sending SQL to DB
 
-$SQLCMD < tmp-import.sql
+$SQLCMD < import.sql
 
 echo Finished sending SQL to DB
 




More information about the Swift-commit mailing list