[Swift-commit] r3714 - provenancedb

noreply at svn.ci.uchicago.edu noreply at svn.ci.uchicago.edu
Tue Nov 23 15:36:41 CST 2010


Author: lgadelha
Date: 2010-11-23 15:36:40 -0600 (Tue, 23 Nov 2010)
New Revision: 3714

Modified:
   provenancedb/prov-init.sql
   provenancedb/prov-to-sql.sh
Log:


Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql	2010-11-23 19:35:18 UTC (rev 3713)
+++ provenancedb/prov-init.sql	2010-11-23 21:36:40 UTC (rev 3714)
@@ -281,31 +281,6 @@
 $$ LANGUAGE SQL;
 
 
-
-
-
-
-
-
-
-
-CREATE OR REPLACE FUNCTION annotation(entity varchar, name varchar) RETURNS anyelement AS $$
-	IF entity = 'workflow' THEN
-		
-	ELSE
-		IF entity = 'process' THEN
-		
-		ELSE
-			IF entity = 'dataset' THEN
-			
-			ELSE
-			
-			END IF;
-		END IF;
-	END IF;	
-$$ LANGUAGE 'plpgsql';
-
-
 -- this GRANT does not work for sqlite; you'll get a syntax error but
 -- ignore it, as it is not needed in sqlite
 grant all on  

Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh	2010-11-23 19:35:18 UTC (rev 3713)
+++ provenancedb/prov-to-sql.sh	2010-11-23 21:36:40 UTC (rev 3714)
@@ -9,22 +9,22 @@
 
 echo Generating SQL for $RUNID
 
-rm -f tmp-import.sql import.sql tmp-ds.sql
+rm -f tmp-ds.sql tmp-p.sql tmp-e.sql tmp-e2.sql tmp-dsu.sql tmp-dsc.sql tmp-f.sql tmp-v.sql tmp-import.sql import.sql 
 
 # this gives a distinction between the root process for a workflow and the
 # workflow itself. perhaps better to model the workflow as a process
-echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" >> tmp-import.sql
+echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" >> tmp-p.sql
 
 while read time duration thread localthread endstate tr_name scratch; do
-    echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$thread', 'execute', '$tr_name', '$WF');" >> tmp-import.sql
-    echo "INSERT INTO execute (id, start_time, duration, final_state, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$scratch');" >> tmp-import.sql
+    echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$thread', 'execute', '$tr_name', '$WF');" >> tmp-p.sql
+    echo "INSERT INTO execute (id, start_time, duration, final_state, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$scratch');" >> tmp-e.sql
 done < execute.global.event
 
 while read start_time duration globalid id endstate thread site scratch; do
     # cut off the last component of the thread, so that we end up at the
     # parent thread id which should correspond with the execute-level ID
     inv_id="$WFID$(echo $thread | sed 's/-[^-]*$//')"
-    echo "INSERT INTO execute2 (id, execute_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');" >> tmp-import.sql
+    echo "INSERT INTO execute2 (id, execute_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');" >> tmp-e2.sql
 done < execute2.global.event
 
 
@@ -41,57 +41,48 @@
 	echo "INSERT INTO dataset (id) VALUES ('$lhs');" >> tmp-ds.sql
  	echo "INSERT INTO dataset (id) VALUES ('$rhs');" >> tmp-ds.sql
 	echo "INSERT INTO dataset (id) VALUES ('$result');" >> tmp-ds.sql
-    echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$operatorid', 'operator', '$name', '$WF');" >> tmp-import.sql
-    echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$lhs', 'lhs');" >> tmp-import.sql
-    echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$rhs', 'rhs');" >> tmp-import.sql
-    echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'O', '$result', 'result');" >> tmp-import.sql
+    echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$operatorid', 'operator', '$name', '$WF');" >> tmp-p.sql
+    echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$lhs', 'lhs');" >> tmp-dsu.sql
+    echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$rhs', 'rhs');" >> tmp-dsu.sql
+    echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'O', '$result', 'result');" >> tmp-dsu.sql
 done < operators.txt
 
 while read id name output; do
 	echo "INSERT INTO dataset (id) VALUES ('$output');" >> tmp-ds.sql
-    echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$id', 'function', '$name', '$WF');" >> tmp-import.sql
-    echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'O', '$output', 'result');" >> tmp-import.sql
+    echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$id', 'function', '$name', '$WF');" >> tmp-p.sql
+    echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'O', '$output', 'result');" >> tmp-dsu.sql
 done < functions.txt
 
 while read id value; do
     # TODO need ordering/naming
 	echo "INSERT INTO dataset (id) VALUES ('$value');" >> tmp-ds.sql
-    echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'I', '$value', 'undefined');" >> tmp-import.sql
+    echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'I', '$value', 'undefined');" >> tmp-dsu.sql
 done < function-inputs.txt
 
-while read thread direction dataset variable rest; do 
-    if [ "$direction" == "input" ] ; then
-	dir=I
-    else
-	dir=O
-    fi
-	echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
-    echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$thread', '$dir', '$dataset', '$variable');" >> tmp-import.sql
-done < tie-data-invocs.txt
 
 while read thread appname; do
-    echo "UPDATE execute SET procedure_name='$appname' WHERE id='$thread';" >> tmp-import.sql
+    echo "UPDATE execute SET procedure_name='$appname' WHERE id='$thread';" >> tmp-e.sql
 done < invocation-procedure-names.txt
 
 while read outer inner; do
 	echo "INSERT INTO dataset (id) VALUES ('$outer');" >> tmp-ds.sql
 	echo "INSERT INTO dataset (id) VALUES ('$inner');" >> tmp-ds.sql
-    echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');" >> tmp-import.sql
+    echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');" >> tmp-dsc.sql
 done < tie-containers.txt
 
 while read dataset filename; do
 	echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
-    echo "INSERT INTO file (id, filename) VALUES ('$dataset', '$filename');" >> tmp-import.sql
+    echo "INSERT INTO file (id, filename) VALUES ('$dataset', '$filename');" >> tmp-f.sql
 done < dataset-filenames.txt
 
 while read dataset value; do
 	echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
-    echo "INSERT INTO variable (id, value) VALUES ('$dataset', '$value');" >> tmp-import.sql
+    echo "INSERT INTO variable (id, value) VALUES ('$dataset', '$value');" >> tmp-v.sql
 done < dataset-values.txt
 
 while read start duration wfid rest; do
-    echo "UPDATE workflow SET start_time=$start WHERE id='$WF';" >> tmp-import.sql
-    echo "UPDATE workflow SET duration=$duration WHERE id='$WF';" >> tmp-import.sql
+    echo "UPDATE workflow SET start_time=$start WHERE id='$WF';" >> tmp-u.sql
+    echo "UPDATE workflow SET duration=$duration WHERE id='$WF';" >> tmp-u.sql
 done < workflow.event
 
 #while read id extrainfo ; do
@@ -102,19 +93,29 @@
 # TODO this could merge with other naming tables
 while read start duration thread final_state procname ; do
     compoundid=$WFID$thread
-    echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$compoundid', 'compound', '$procname', '$WF');" >> tmp-import.sql
+    echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$compoundid', 'compound', '$procname', '$WF');" >> tmp-p.sql
 done < compound.event
 
 while read start duration thread final_state procname ; do
     fqid=$WFID$thread
-    echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$fqid', 'internal', '$procname', '$WF');" >> tmp-import.sql
+    echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$fqid', 'internal', '$procname', '$WF');" >> tmp-p.sql
 done < internalproc.event
 
 while read t ; do 
     thread="${WFID}$t"
-    echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$thread', 'scope', 'scope', '$WF');" >> tmp-import.sql
+    echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$thread', 'scope', 'scope', '$WF');" >> tmp-p.sql
 done < scopes.txt
 
+while read thread direction dataset variable rest; do 
+    if [ "$direction" == "input" ] ; then
+	dir=I
+    else
+	dir=O
+    fi
+	echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
+    echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$thread', '$dir', '$dataset', '$variable');" >> tmp-dsu.sql
+done < tie-data-invocs.txt
+
 while read id ; do
     echo "INSERT INTO createarray (array_id) VALUES ('$id');" >> tmp-import.sql
 done < createarray.txt
@@ -123,10 +124,17 @@
     echo "INSERT INTO createarray_member (array_id, ix, member_id) VALUES ('$arrayid', '$index', '$memberid');" >> tmp-import.sql
 done < createarray-members.txt
 
-echo "BEGIN;" > import.sql
-cat tmp-ds.sql | sort | uniq >> import.sql
-cat tmp-import.sql >> import.sql
-echo "COMMIT;" >> import.sql 
+cat tmp-ds.sql | sort | uniq > import.sql
+cat tmp-f.sql | sort | uniq >> import.sql
+cat tmp-v.sql | sort | uniq >> import.sql
+cat tmp-p.sql | sort | uniq >> import.sql
+cat tmp-e.sql | sort | uniq >> import.sql
+cat tmp-e2.sql | sort | uniq >> import.sql
+cat tmp-dsu.sql | sort | uniq >> import.sql
+cat tmp-dsc.sql | sort | uniq >> import.sql
+cat tmp-import.sql | sort | uniq >> import.sql
+cat tmp-u.sql | sort | uniq >> import.sql
+
 echo Sending SQL to DB
 
 $SQLCMD < import.sql




More information about the Swift-commit mailing list