[Swift-commit] r3966 - provenancedb

noreply at svn.ci.uchicago.edu noreply at svn.ci.uchicago.edu
Fri Jan 14 17:27:10 CST 2011


Author: lgadelha
Date: 2011-01-14 17:27:10 -0600 (Fri, 14 Jan 2011)
New Revision: 3966

Modified:
   provenancedb/pql_functions.sql
   provenancedb/prov-to-sql.sh
Log:
Added generic annotation process using SWIFT_EXTRA_INFO from env profile.


Modified: provenancedb/pql_functions.sql
===================================================================
--- provenancedb/pql_functions.sql	2011-01-12 19:35:11 UTC (rev 3965)
+++ provenancedb/pql_functions.sql	2011-01-14 23:27:10 UTC (rev 3966)
@@ -152,15 +152,17 @@
 
 -- recursive query to find ancestor entities in a provenance graph
 
-create or replace function ancestors(varchar) returns setof varchar as $$
-       with recursive anc(ancestor,descendant) as
+CREATE OR REPLACE FUNCTION ancestors(varchar) RETURNS SETOF varchar AS $$
+       WITH RECURSIVE anc(ancestor,descendant) AS
          (    
-              select parent as ancestor, child as descendant from parent_of where child=$1
-              union all
-              select parent_of.parent as ancestor, anc.descendant as descendant
-              from   anc,parent_of
-              where  anc.ancestor=parent_of.child
+              SELECT parent AS ancestor, child AS descendant FROM parent_of WHERE child=$1
+              UNION
+              SELECT parent_of.parent AS ancestor, anc.descendant AS descendant
+              FROM   anc,parent_of
+              WHERE  anc.ancestor=parent_of.child
          )
-       select ancestor from anc
-$$ language sql;
+       SELECT ancestor FROM anc
+$$ LANGUAGE SQL;
 
+
+ 

Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh	2011-01-12 19:35:11 UTC (rev 3965)
+++ provenancedb/prov-to-sql.sh	2011-01-14 23:27:10 UTC (rev 3966)
@@ -9,22 +9,21 @@
 
 echo Generating SQL for $RUNID
 
-rm -f tmp-u.sql tmp-ds.sql tmp-p.sql tmp-e.sql tmp-e2.sql tmp-dsu.sql tmp-dsc.sql tmp-f.sql tmp-v.sql tmp-import.sql import.sql
 
 # this gives a distinction between the root process for a workflow and the
 # workflow itself. perhaps better to model the workflow as a process
-echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" >> tmp-p.sql
+echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" > tmp-p.sql
 
 while read time duration thread localthread endstate tr_name scratch; do
     echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$thread', 'execute', '$tr_name', '$WF');" >> tmp-p.sql
-    echo "INSERT INTO execute (id, start_time, duration, final_state, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$scratch');" >> tmp-e.sql
+    echo "INSERT INTO execute (id, start_time, duration, final_state, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$scratch');" > tmp-e.sql
 done < execute.global.event
 
 while read start_time duration globalid id endstate thread site scratch; do
     # cut off the last component of the thread, so that we end up at the
     # parent thread id which should correspond with the execute-level ID
     inv_id="$WFID$(echo $thread | sed 's/-[^-]*$//')"
-    echo "INSERT INTO execute2 (id, execute_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');" >> tmp-e2.sql
+    echo "INSERT INTO execute2 (id, execute_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');" > tmp-e2.sql
 done < execute2.global.event
 
 while read col1 col2 col3 col4 col5 thread name lhs rhs result; do
@@ -42,11 +41,11 @@
 	result=$(echo $result | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
     fi
     
-    echo "INSERT INTO dataset (id) VALUES ('$lhs');" >> tmp-ds.sql
+    echo "INSERT INTO dataset (id) VALUES ('$lhs');" > tmp-ds.sql
     echo "INSERT INTO dataset (id) VALUES ('$rhs');" >> tmp-ds.sql
     echo "INSERT INTO dataset (id) VALUES ('$result');" >> tmp-ds.sql
     echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$operatorid', 'operator', '$name', '$WF');" >> tmp-p.sql
-    echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$lhs', 'lhs');" >> tmp-dsu.sql
+    echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$lhs', 'lhs');" > tmp-dsu.sql
     echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$rhs', 'rhs');" >> tmp-dsu.sql
     echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'O', '$result', 'result');" >> tmp-dsu.sql
 done < operators.txt
@@ -85,7 +84,7 @@
 
     echo "INSERT INTO dataset (id) VALUES ('$outer');" >> tmp-ds.sql
     echo "INSERT INTO dataset (id) VALUES ('$inner');" >> tmp-ds.sql
-    echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');" >> tmp-dsc.sql
+    echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');" > tmp-dsc.sql
 done < tie-containers.txt
 
 while read dataset filename; do
@@ -95,7 +94,7 @@
     fi
 
     echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
-    echo "INSERT INTO file (id, filename) VALUES ('$dataset', '$filename');" >> tmp-f.sql
+    echo "INSERT INTO file (id, filename) VALUES ('$dataset', '$filename');" > tmp-f.sql
 done < dataset-filenames.txt
 
 while read dataset value; do
@@ -105,19 +104,39 @@
     fi
 
     echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
-    echo "INSERT INTO variable (id, value) VALUES ('$dataset', '$value');" >> tmp-v.sql
+    echo "INSERT INTO variable (id, value) VALUES ('$dataset', '$value');" > tmp-v.sql
 done < dataset-values.txt
 
 while read start duration wfid rest; do
-    echo "UPDATE workflow SET start_time=$start WHERE id='$WF';" >> tmp-u.sql
+    echo "UPDATE workflow SET start_time=$start WHERE id='$WF';" > tmp-u.sql
     echo "UPDATE workflow SET duration=$duration WHERE id='$WF';" >> tmp-u.sql
 done < workflow.event
 
-#while read id extrainfo ; do
-# TODO this will not like quotes and things like that in extrainfo
-#    echo "INSERT INTO extrainfo (id, extrainfo) VALUES ('$id', '$extrainfo');" >> tmp-import.sql
-#done < extrainfo.txt
 
+
+while read id extrainfo; do
+    echo $extrainfo | awk -F ";"  '{ for (i = 1; i <= NF; i++)
+                                             print $i
+                                         }' | awk -F "=" '{ print $1 " " $2 }' | awk -F ":" '{ print $1 " " $2 }' > fields.txt
+    while read name type value; do
+	if [ "$type" = "num" ]; then
+	    echo "INSERT INTO annot_p_num (id, name, value) VALUES ('$id', '$name', $value);" > tmp-import.sql
+	fi 
+	if [ "$type" = "txt" ]; then
+	    echo "INSERT INTO annot_p_txt (id, name, value) VALUES ('$id', '$name', '$value');" >> tmp-import.sql
+	fi
+	if [ "$type" = "bool" ]; then
+	    echo "INSERT INTO annot_p_bool (id, name, value) VALUES ('$id', '$name', $value);" >> tmp-import.sql
+	fi
+    done < fields.txt
+done < extrainfo.txt
+
+
+while read id extrainfo ; do
+    
+    echo "INSERT INTO extrainfo (id, extrainfo) VALUES ('$id', '$extrainfo');" >> tmp-import.sql
+done < extrainfo.txt
+
 # TODO this could merge with other naming tables
 while read start duration thread final_state procname ; do
     if [ "$duration" != "last-event-line" ]; then
@@ -174,6 +193,7 @@
 echo Sending SQL to DB
 
 $SQLCMD < import.sql
+rm -f tmp-u.sql tmp-ds.sql tmp-p.sql tmp-e.sql tmp-e2.sql tmp-dsu.sql tmp-dsc.sql tmp-f.sql tmp-v.sql tmp-import.sql import.sql fields.txt
 
 echo Finished sending SQL to DB
 




More information about the Swift-commit mailing list