[Swift-commit] r7384 - provenancedb

lgadelha at ci.uchicago.edu lgadelha at ci.uchicago.edu
Sat Dec 7 18:20:44 CST 2013


Author: lgadelha
Date: 2013-12-07 18:20:43 -0600 (Sat, 07 Dec 2013)
New Revision: 7384

Modified:
   provenancedb/prov-init.sql
Log:
New provenance database views to facilitate querying


Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql	2013-12-06 22:29:39 UTC (rev 7383)
+++ provenancedb/prov-init.sql	2013-12-08 00:20:43 UTC (rev 7384)
@@ -30,6 +30,19 @@
 drop table primitive cascade;
 drop table dataset_containment cascade;
 drop table ds cascade;
+drop view script_and_fun_call;
+drop view script_to_app_fun_call;
+drop view script_to_app_exec;
+drop view script_to_app_exec_with_runtime_stats;
+drop view dataset_primitive_and_mapped; 
+drop view dataset_all;
+drop view dataset_all_with_annotations;
+drop view dataset_all_with_annotations_and_parameter_names;
+drop view function_call_dataflow;
+drop view function_call_dataflow_with_input_details;
+drop view function_call_dataflow_with_dataset_details;
+drop view provenance_all;
+drop view provenance_summary;
 
 -- application_catalog stores tc.file
 create table tc_file (
@@ -74,6 +87,12 @@
 	 script_hash, tc_file_hash, sites_file_hash
   from   run;
 
+	
+create view script_run_summary as 
+       select id,swift_version,cog_version,final_state,
+       	      start_time,duration,script_filename 
+       from script_run;
+
 -- process gives information about each process (in the OPM sense)
 -- it is augmented by information in other tables
 -- specifies the type of process. for any type, it
@@ -225,16 +244,6 @@
        primary key (out_id,in_id)
 );
 
-create view dataset as 
-    select mapped.id, 'mapped' as type, mapped.filename, null as value
-    from mapped
-  union all 
-    select primitive.id, 'primitive' as type, null as filename, primitive.value
-    from primitive
-  union all
-    select dataset_containment.out_id as id, 'composite' as type, null as filename, null as value 
-    from dataset_containment;
-
 -- dataset_usage records usage relationships between processes and datasets;
 -- in SwiftScript terms, the input and output parameters for each
 -- application procedure invocation; in OPM terms, the artificts which are
@@ -325,33 +334,191 @@
        union all
        	    select dataset_id as parent, function_call_id as child 
 	    from dataset_in;
-	
-create view script_run_summary as 
-       select id,swift_version,cog_version,final_state,
-       	      start_time,duration,script_filename 
-       from script_run;
 
 create view annotation_text as 
-       select dataset_id as entity_id, name, value as text_value, 'dataset' as entity_type from annot_dataset_text
+       select dataset_id as entity_id, name as key, value as text_value, 'dataset' as entity_type from annot_dataset_text
        union all
-       select function_call_id as entity_id, value as text_value, 'function_call' as entity_type from annot_function_call_text
+       select function_call_id as entity_id, name as key, value as text_value, 'function_call' as entity_type from annot_function_call_text
        union all
-       select app_exec_id as entity_id, value as text_value, 'app_exec' as entity_type from annot_app_exec_text
+       select app_exec_id as entity_id, name as key, value as text_value, 'app_exec' as entity_type from annot_app_exec_text
        union all
-       select script_run_id as entity_id, value as text_value, 'script_run' as entity from annot_script_run_text;
+       select script_run_id as entity_id, name as key, value as text_value, 'script_run' as entity from annot_script_run_text;
 
 create view annotation_numeric as 
-       select dataset_id as entity_id, name, value as numeric_value, 'dataset' as entity_type from annot_dataset_num
+       select dataset_id as entity_id, name as key, value as numeric_value, 'dataset' as entity_type from annot_dataset_num
        union all
-       select function_call_id as entity_id, value as numeric_value, 'function_call' as entity_type from annot_function_call_num
+       select function_call_id as entity_id, name as key, value as numeric_value, 'function_call' as entity_type from annot_function_call_num
        union all
-       select app_exec_id as entity_id, value as numeric_value, 'app_exec' as entity_type from annot_app_exec_num
+       select app_exec_id as entity_id, name as key, value as numeric_value, 'app_exec' as entity_type from annot_app_exec_num
        union all
-       select script_run_id as entity_id, value as numeric_value, 'script_run' as entity from annot_script_run_num;
+       select script_run_id as entity_id, name as key, value as numeric_value, 'script_run' as entity from annot_script_run_num;
 
 
 create view annotation as
-       select entity_id, entity_type, NULL as numeric_value, text_value from annotation_text
+       select entity_id, entity_type, key, NULL as numeric_value, text_value from annotation_text
        union all
-       select entity_id, entity_type, numeric_value, NULL as text_value from annotation_numeric;
-       
\ No newline at end of file
+       select entity_id, entity_type, key, numeric_value, NULL as text_value from annotation_numeric;
+       
+create view script_and_fun_call as
+       select script_run_summary.id as script_run_id, log_filename, script_filename, swift_version, cog_version,
+       	      script_run_summary.final_state as script_run_final_state, 
+       	      script_run_summary.start_time as script_run_start_time,
+       	      script_run_summary.duration as script_run_duration, 
+      	      script_filename, fun_call.id as function_call_id, 
+       	      fun_call.type as function_call_type, 
+       	      fun_call.name as function_call_name 
+       from   script_run_summary,fun_call 
+       where  fun_call.run_id=script_run_summary.id;
+
+
+create view script_to_app_fun_call as
+       select script_and_fun_call.*, app_fun_call.name as app_fun_call_name, app_fun_call.start_time as app_fun_call_start_time,
+       	      app_fun_call.duration as app_fun_call_duration, app_fun_call.final_state as app_fun_call_final_state, 
+	      app_fun_call.scratch as app_fun_call_scratch 
+       from script_and_fun_call 
+       	    left outer join 
+       	    app_fun_call 
+	    on (app_fun_call.id=script_and_fun_call.function_call_id);
+
+create view script_to_app_exec as
+       select script_to_app_fun_call.*, app_exec.id as app_exec_id, app_exec.start_time as app_exec_start_time,
+       	      app_exec.duration as app_exec_duration, app_exec.final_state as app_exec_final_state, app_exec.site as app_exec_site 
+       from script_to_app_fun_call 
+       	    left outer join 
+	    app_exec
+	    on (script_to_app_fun_call.function_call_id=app_exec.app_fun_call_id);
+
+
+create view script_to_app_exec_with_runtime_stats as
+       select script_to_app_fun_call.*, app_exec.id as app_exec_id, app_exec.start_time as app_exec_start_time,
+       	      app_exec.duration as app_exec_duration, app_exec.final_state as app_exec_final_state, app_exec.site as app_exec_site,
+	      app_exec.real_secs as app_exec_real_secs, app_exec.kernel_secs as app_exec_kernel_secs, 
+	      app_exec.user_secs as app_exec_user_secs, app_exec.percent_cpu as app_exec_percent_cpu, app_exec.max_rss as app_exec_max_rss,
+	      app_exec.avg_rss as app_exec_avg_rss, app_exec.avg_tot_vm as app_exec_avg_tot_vm, app_exec.avg_priv_data as app_exec_avg_priv_data, 
+	      app_exec.avg_priv_stack as app_exec_avg_priv_stack, app_exec.avg_shared_text as app_exec_avg_shared_text,
+	      app_exec.page_size as app_exec_page_size, app_exec.major_pgfaults as  app_exec_major_pgfaults, app_exec.minor_pgfaults as app_exec_minor_pgfaults,
+	      app_exec.swaps as app_exec_swaps, app_exec.invol_context_switches as app_exec_invol_context_switches, 
+	      app_exec.vol_waits as app_exec_vol_waits, app_exec.fs_reads as app_exec_fs_reads, app_exec.fs_writes as app_exec_fs_writes, 
+	      app_exec.sock_recv as app_exec_sock_recv, app_exec.sock_send as app_exec_sock_send, app_exec.signals as app_exec_signals, 
+	      app_exec.exit_status as app_exec_exit_status
+       from script_to_app_fun_call 
+       	    left outer join 
+	    app_exec
+	    on (script_to_app_fun_call.function_call_id=app_exec.app_fun_call_id);
+
+
+create view dataset_primitive_and_mapped as 
+       select mapped.id as dataset_id, 'mapped' as dataset_type, NULL as dataset_value, mapped.filename as dataset_filename
+       from mapped
+       union all
+       select primitive.id as dataset_id, 'primitive' as dataset_type, primitive.value as dataset_value, NULL as dataset_filename
+       from primitive;
+
+create view dataset_all as
+       select * from dataset_primitive_and_mapped
+       union all
+       select distinct dataset_containment.out_id as dataset_id, 'collection' as dataset_type, NULL as dataset_value, NULL as dataset_filename 
+       from dataset_containment
+       union all
+       select collection.id as dataset_id, 'other' as dataset_type, NULL as dataset_value, NULL as dataset_filename
+       from (
+       	    select distinct * 
+       	    from ds 
+       	    where id 
+	    not in (
+	     select dataset_id 
+	     from dataset_primitive_and_mapped 
+	     union 
+	     select dataset_containment.out_id as dataset_id 
+	     from dataset_containment)) as collection;
+
+create view dataset_all_with_annotations as
+       select dataset_all.*, annotation.key as annotation_key, annotation.numeric_value as annotation_numeric_value, 
+              annotation.text_value as annotation_text_value 
+       from   dataset_all 
+              left outer join 
+	      annotation 
+       	      on (dataset_id=entity_id);
+
+create view dataset_all_with_annotations_and_parameter_names as
+       select distinct dataset_all_with_annotations.dataset_id, dataset_all_with_annotations.dataset_type, dataset_io.parameter AS script_variable, 
+       	      	       dataset_all_with_annotations.dataset_value, dataset_all_with_annotations.dataset_filename,
+		       dataset_all_with_annotations.annotation_key, dataset_all_with_annotations.annotation_numeric_value, 
+		       dataset_all_with_annotations.annotation_text_value
+       from dataset_io
+            natural join
+	    dataset_all_with_annotations;
+
+create view function_call_dataflow as
+select dataset_in.dataset_id as input_dataset_id, dataset_in.parameter as input_parameter_name, dataset_in.function_call_id, 
+       dataset_out.dataset_id as output_dataset_id, dataset_out.parameter as output_parameter_name
+from   dataset_in, dataset_out 
+where  dataset_in.function_call_id=dataset_out.function_call_id
+union all
+select NULL as input_dataset_id, NULL as input_parameter_name, dataset_out.function_call_id,
+       dataset_out.dataset_id as output_dataset_id, dataset_out.parameter as output_parameter_name
+from   dataset_out
+where  dataset_out.function_call_id not in (select function_call_id from dataset_in)
+union all
+select dataset_in.dataset_id as input_dataset_id, dataset_in.parameter as input_parameter_name, dataset_in.function_call_id,
+       NULL as output_dataset_id, NULL as output_parameter_name 
+from   dataset_in
+where  dataset_in.function_call_id not in (select function_call_id from dataset_out);
+
+
+create view function_call_dataflow_with_input_details as
+select dataset_all_with_annotations.dataset_id as input_dataset_id, dataset_all_with_annotations.dataset_type as input_dataset_type,
+       function_call_dataflow.input_parameter_name, dataset_all_with_annotations.dataset_value as input_dataset_value,  
+       dataset_all_with_annotations.dataset_filename as input_dataset_filename,
+       dataset_all_with_annotations.annotation_key as input_dataset_annotation_key, 
+       dataset_all_with_annotations.annotation_numeric_value as input_dataset_annotation_numeric_value,
+       dataset_all_with_annotations.annotation_text_value as input_dataset_annotation_text_value, function_call_dataflow.function_call_id,
+       function_call_dataflow.output_dataset_id, function_call_dataflow.output_parameter_name 
+from 
+dataset_all_with_annotations 
+right outer join 
+function_call_dataflow 
+on (dataset_all_with_annotations.dataset_id=function_call_dataflow.input_dataset_id);
+
+create view function_call_dataflow_with_dataset_details as
+select function_call_dataflow_with_input_details.*,  dataset_all_with_annotations.dataset_type as output_dataset_type,
+       dataset_all_with_annotations.dataset_value as output_dataset_value,  
+       dataset_all_with_annotations.dataset_filename as output_dataset_filename,
+       dataset_all_with_annotations.annotation_key as output_dataset_annotation_key, 
+       dataset_all_with_annotations.annotation_numeric_value as output_dataset_annotation_numeric_value,
+       dataset_all_with_annotations.annotation_text_value as output_dataset_annotation_text_value
+from   function_call_dataflow_with_input_details
+       left outer join 
+       dataset_all_with_annotations 
+       on (dataset_all_with_annotations.dataset_id=function_call_dataflow_with_input_details.output_dataset_id);
+         
+create view provenance_all as
+select script_run_id, swift_version, cog_version, script_run_final_state, script_run_start_time, script_run_duration, script_filename,
+       input_dataset_id, input_dataset_type, input_parameter_name, input_dataset_value, input_dataset_filename, 
+       input_dataset_annotation_key, input_dataset_annotation_numeric_value, input_dataset_annotation_text_value,
+       script_to_app_exec.function_call_id, function_call_type, function_call_name, app_fun_call_name, app_fun_call_start_time,
+       app_fun_call_duration, app_fun_call_final_state, app_fun_call_scratch, app_exec_id, app_exec_start_time, app_exec_duration, 
+       app_exec_final_state, app_exec_site, output_dataset_id, output_parameter_name, output_dataset_type, output_dataset_value, 
+       output_dataset_filename, output_dataset_annotation_key, output_dataset_annotation_numeric_value, output_dataset_annotation_text_value 
+from   function_call_dataflow_with_dataset_details
+       left outer join
+       script_to_app_exec on (function_call_dataflow_with_dataset_details.function_call_id=script_to_app_exec.function_call_id);
+
+
+
+select script_run_id, swift_version, cog_version, script_run_final_state, script_run_start_time, script_run_duration, script_filename,
+       input_dataset_id, input_dataset_type, input_parameter_name, input_dataset_value, input_dataset_filename, 
+       input_dataset_annotation_key, input_dataset_annotation_numeric_value, input_dataset_annotation_text_value,
+       script_to_app_exec.function_call_id, function_call_type, function_call_name, app_fun_call_name, app_fun_call_start_time,
+       app_fun_call_duration, app_fun_call_final_state, app_fun_call_scratch, app_exec_id, app_exec_start_time, app_exec_duration, 
+       app_exec_final_state, app_exec_site, output_dataset_id, output_parameter_name, output_dataset_type, output_dataset_value, 
+       output_dataset_filename, output_dataset_annotation_key, output_dataset_annotation_numeric_value, output_dataset_annotation_text_value 
+from   function_call_dataflow_with_dataset_details
+       right outer join
+       script_to_app_exec on (function_call_dataflow_with_dataset_details.function_call_id=script_to_app_exec.function_call_id);
+
+create view provenance_summary as
+select script_run_id, script_filename, input_dataset_id, input_dataset_type, input_parameter_name, input_dataset_value, 
+       input_dataset_filename, function_call_id, function_call_type, function_call_name, output_dataset_id, output_parameter_name, 
+       output_dataset_type, output_dataset_value, output_dataset_filename 
+from   provenance_all;
\ No newline at end of file




More information about the Swift-commit mailing list