[Swift-commit] r7568 - provenancedb

lgadelha at ci.uchicago.edu lgadelha at ci.uchicago.edu
Tue Feb 4 15:45:35 CST 2014


Author: lgadelha
Date: 2014-02-04 15:45:35 -0600 (Tue, 04 Feb 2014)
New Revision: 7568

Modified:
   provenancedb/pql_functions.sql
   provenancedb/prov-init.sql
   provenancedb/prov-to-sql.sh
Log:
Added recursive functions for traversing the coarse provenance graph


Modified: provenancedb/pql_functions.sql
===================================================================
--- provenancedb/pql_functions.sql	2014-02-04 20:38:15 UTC (rev 7567)
+++ provenancedb/pql_functions.sql	2014-02-04 21:45:35 UTC (rev 7568)
@@ -235,9 +235,9 @@
     value VARCHAR
 ) 
 AS $$
-	SELECT script_run.id,script_run.start_time,script_run.duration,dataset_io.parameter,dataset.value
-	FROM   dataset,dataset_io,fun_call,script_run
-	WHERE  dataset.id=dataset_io.dataset_id AND dataset_io.function_call_id=fun_call.id AND 
+	SELECT script_run.id,script_run.start_time,script_run.duration,dataset_io.parameter,dataset_all.dataset_value
+	FROM   dataset_all,dataset_io,fun_call,script_run
+	WHERE  dataset_all.dataset_id=dataset_io.dataset_id AND dataset_io.function_call_id=fun_call.id AND 
 	       fun_call.run_id=script_run.id AND dataset_io.parameter=$1 
 $$ LANGUAGE SQL;
 
@@ -259,6 +259,25 @@
 $$ LANGUAGE SQL;
 
 
+-- recursive query to find ancestor entities in a provenance graph
+CREATE OR REPLACE FUNCTION ancestors_coarse(varchar) 
+RETURNS SETOF varchar AS $$
+  WITH RECURSIVE anc(ancestor,descendant) AS
+  (    
+       SELECT parent AS ancestor, child AS descendant 
+       FROM   provenance_graph_edge_coarse
+       WHERE child=$1
+     UNION
+       SELECT provenance_graph_edge_coarse.parent AS ancestor, 
+              anc.descendant AS descendant
+       FROM   anc, provenance_graph_edge_coarse
+       WHERE  anc.ancestor=provenance_graph_edge_coarse.child
+  )
+  SELECT ancestor FROM anc
+$$ LANGUAGE SQL;
+
+
+
 -- compare(<entity>, <list of parameter_names, annotations keys>
 CREATE OR REPLACE FUNCTION compare_run(VARIADIC args VARCHAR[])
 RETURNS SETOF RECORD AS $$

Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql	2014-02-04 20:38:15 UTC (rev 7567)
+++ provenancedb/prov-init.sql	2014-02-04 21:45:35 UTC (rev 7568)
@@ -30,8 +30,8 @@
 drop table primitive cascade;
 drop table dataset_containment cascade;
 drop table ds cascade;
-drop table stage_in;
-drop table stage_out;
+drop table stage_in cascade;
+drop table stage_out cascade;
 drop view script_and_fun_call;
 drop view script_to_app_fun_call;
 drop view script_to_app_exec;
@@ -45,6 +45,10 @@
 drop view function_call_dataflow_with_dataset_details;
 drop view provenance_all;
 drop view provenance_summary;
+drop view app_exec_stage_in;
+drop view app_exec_stage_out;
+drop view app_exec_staging;
+drop view app_exec_staging_all;
 
 -- application_catalog stores tc.file
 create table tc_file (
@@ -536,7 +540,7 @@
        minor_pgfaults as app_exec_minor_pgfaults, swaps as app_exec_swaps, invol_context_switches as app_exec_invol_context_switches,
        vol_waits as app_exec_vol_waits, fs_reads as app_exec_fs_reads, fs_writes as app_exec_fs_writes, sock_recv as app_exec_sock_recv,
        sock_send as app_exec_sock_send, signals as app_exec_signals, exit_status as app_exec_exit_status
-from   stage_out,app_exec where stage_in.app_exec_id=app_exec.id;
+from   stage_out,app_exec where stage_out.app_exec_id=app_exec.id;
 
 create view app_exec_staging as
 select stage_in.filename as staged_in_filename, id as app_exec_id, app_fun_call_id, start_time as app_exec_start_time, duration as app_exec_duration,
@@ -552,4 +556,15 @@
 create view app_exec_staging_all as
 select run_id as script_run_id, fun_call.name as app_fun_call_name,  app_exec_staging.*
 from   fun_call, app_exec_staging
-where  fun_call.id=app_exec_staging.app_fun_call_id;
\ No newline at end of file
+where  fun_call.id=app_exec_staging.app_fun_call_id;
+
+create view provenance_graph_edge_coarse as
+       select app_exec_id as parent, filename as child 
+       from   stage_out 
+union all
+       select filename as parent, app_exec_id as child 
+       from   stage_in;
+
+create view provenance_summary_coarse as
+select script_run_id, staged_in_filename, app_fun_call_name, staged_out_filename 
+from   app_exec_staging_all;

Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh	2014-02-04 20:38:15 UTC (rev 7567)
+++ provenancedb/prov-to-sql.sh	2014-02-04 21:45:35 UTC (rev 7568)
@@ -210,7 +210,7 @@
 
 while read stage_out desthost execute2_id; do
     app_exec_id=$WFID$execute2_id
-    filename="filename://"$desthost"/"$(echo $stage_out | sed "s/\/\.\//\//g"); 
+    filename="file://"$desthost"/"$(echo $stage_out | sed "s/\/\.\//\//g"); 
     echo "INSERT INTO stage_out VALUES ('$app_exec_id', '$filename');"  >> /tmp/$RUNID.sql
 done < stage-outs.txt 
 




More information about the Swift-commit mailing list