[Swift-commit] r7384 - provenancedb
lgadelha at ci.uchicago.edu
lgadelha at ci.uchicago.edu
Sat Dec 7 18:20:44 CST 2013
Author: lgadelha
Date: 2013-12-07 18:20:43 -0600 (Sat, 07 Dec 2013)
New Revision: 7384
Modified:
provenancedb/prov-init.sql
Log:
New provenance database views to facilitate querying
Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql 2013-12-06 22:29:39 UTC (rev 7383)
+++ provenancedb/prov-init.sql 2013-12-08 00:20:43 UTC (rev 7384)
@@ -30,6 +30,19 @@
drop table primitive cascade;
drop table dataset_containment cascade;
drop table ds cascade;
+drop view script_and_fun_call;
+drop view script_to_app_fun_call;
+drop view script_to_app_exec;
+drop view script_to_app_exec_with_runtime_stats;
+drop view dataset_primitive_and_mapped;
+drop view dataset_all;
+drop view dataset_all_with_annotations;
+drop view dataset_all_with_annotations_and_parameter_names;
+drop view function_call_dataflow;
+drop view function_call_dataflow_with_input_details;
+drop view function_call_dataflow_with_dataset_details;
+drop view provenance_all;
+drop view provenance_summary;
-- application_catalog stores tc.file
create table tc_file (
@@ -74,6 +87,12 @@
script_hash, tc_file_hash, sites_file_hash
from run;
+
+create view script_run_summary as
+ select id,swift_version,cog_version,final_state,
+ start_time,duration,script_filename
+ from script_run;
+
-- process gives information about each process (in the OPM sense)
-- it is augmented by information in other tables
-- specifies the type of process. for any type, it
@@ -225,16 +244,6 @@
primary key (out_id,in_id)
);
-create view dataset as
- select mapped.id, 'mapped' as type, mapped.filename, null as value
- from mapped
- union all
- select primitive.id, 'primitive' as type, null as filename, primitive.value
- from primitive
- union all
- select dataset_containment.out_id as id, 'composite' as type, null as filename, null as value
- from dataset_containment;
-
-- dataset_usage records usage relationships between processes and datasets;
-- in SwiftScript terms, the input and output parameters for each
-- application procedure invocation; in OPM terms, the artificts which are
@@ -325,33 +334,191 @@
union all
select dataset_id as parent, function_call_id as child
from dataset_in;
-
-create view script_run_summary as
- select id,swift_version,cog_version,final_state,
- start_time,duration,script_filename
- from script_run;
create view annotation_text as
- select dataset_id as entity_id, name, value as text_value, 'dataset' as entity_type from annot_dataset_text
+ select dataset_id as entity_id, name as key, value as text_value, 'dataset' as entity_type from annot_dataset_text
union all
- select function_call_id as entity_id, value as text_value, 'function_call' as entity_type from annot_function_call_text
+ select function_call_id as entity_id, name as key, value as text_value, 'function_call' as entity_type from annot_function_call_text
union all
- select app_exec_id as entity_id, value as text_value, 'app_exec' as entity_type from annot_app_exec_text
+ select app_exec_id as entity_id, name as key, value as text_value, 'app_exec' as entity_type from annot_app_exec_text
union all
- select script_run_id as entity_id, value as text_value, 'script_run' as entity from annot_script_run_text;
+ select script_run_id as entity_id, name as key, value as text_value, 'script_run' as entity from annot_script_run_text;
create view annotation_numeric as
- select dataset_id as entity_id, name, value as numeric_value, 'dataset' as entity_type from annot_dataset_num
+ select dataset_id as entity_id, name as key, value as numeric_value, 'dataset' as entity_type from annot_dataset_num
union all
- select function_call_id as entity_id, value as numeric_value, 'function_call' as entity_type from annot_function_call_num
+ select function_call_id as entity_id, name as key, value as numeric_value, 'function_call' as entity_type from annot_function_call_num
union all
- select app_exec_id as entity_id, value as numeric_value, 'app_exec' as entity_type from annot_app_exec_num
+ select app_exec_id as entity_id, name as key, value as numeric_value, 'app_exec' as entity_type from annot_app_exec_num
union all
- select script_run_id as entity_id, value as numeric_value, 'script_run' as entity from annot_script_run_num;
+ select script_run_id as entity_id, name as key, value as numeric_value, 'script_run' as entity from annot_script_run_num;
create view annotation as
- select entity_id, entity_type, NULL as numeric_value, text_value from annotation_text
+ select entity_id, entity_type, key, NULL as numeric_value, text_value from annotation_text
union all
- select entity_id, entity_type, numeric_value, NULL as text_value from annotation_numeric;
-
\ No newline at end of file
+ select entity_id, entity_type, key, numeric_value, NULL as text_value from annotation_numeric;
+
+create view script_and_fun_call as
+ select script_run_summary.id as script_run_id, log_filename, script_filename, swift_version, cog_version,
+ script_run_summary.final_state as script_run_final_state,
+ script_run_summary.start_time as script_run_start_time,
+ script_run_summary.duration as script_run_duration,
+ script_filename, fun_call.id as function_call_id,
+ fun_call.type as function_call_type,
+ fun_call.name as function_call_name
+ from script_run_summary,fun_call
+ where fun_call.run_id=script_run_summary.id;
+
+
+create view script_to_app_fun_call as
+ select script_and_fun_call.*, app_fun_call.name as app_fun_call_name, app_fun_call.start_time as app_fun_call_start_time,
+ app_fun_call.duration as app_fun_call_duration, app_fun_call.final_state as app_fun_call_final_state,
+ app_fun_call.scratch as app_fun_call_scratch
+ from script_and_fun_call
+ left outer join
+ app_fun_call
+ on (app_fun_call.id=script_and_fun_call.function_call_id);
+
+create view script_to_app_exec as
+ select script_to_app_fun_call.*, app_exec.id as app_exec_id, app_exec.start_time as app_exec_start_time,
+ app_exec.duration as app_exec_duration, app_exec.final_state as app_exec_final_state, app_exec.site as app_exec_site
+ from script_to_app_fun_call
+ left outer join
+ app_exec
+ on (script_to_app_fun_call.function_call_id=app_exec.app_fun_call_id);
+
+
+create view script_to_app_exec_with_runtime_stats as
+ select script_to_app_fun_call.*, app_exec.id as app_exec_id, app_exec.start_time as app_exec_start_time,
+ app_exec.duration as app_exec_duration, app_exec.final_state as app_exec_final_state, app_exec.site as app_exec_site,
+ app_exec.real_secs as app_exec_real_secs, app_exec.kernel_secs as app_exec_kernel_secs,
+ app_exec.user_secs as app_exec_user_secs, app_exec.percent_cpu as app_exec_percent_cpu, app_exec.max_rss as app_exec_max_rss,
+ app_exec.avg_rss as app_exec_avg_rss, app_exec.avg_tot_vm as app_exec_avg_tot_vm, app_exec.avg_priv_data as app_exec_avg_priv_data,
+ app_exec.avg_priv_stack as app_exec_avg_priv_stack, app_exec.avg_shared_text as app_exec_avg_shared_text,
+ app_exec.page_size as app_exec_page_size, app_exec.major_pgfaults as app_exec_major_pgfaults, app_exec.minor_pgfaults as app_exec_minor_pgfaults,
+ app_exec.swaps as app_exec_swaps, app_exec.invol_context_switches as app_exec_invol_context_switches,
+ app_exec.vol_waits as app_exec_vol_waits, app_exec.fs_reads as app_exec_fs_reads, app_exec.fs_writes as app_exec_fs_writes,
+ app_exec.sock_recv as app_exec_sock_recv, app_exec.sock_send as app_exec_sock_send, app_exec.signals as app_exec_signals,
+ app_exec.exit_status as app_exec_exit_status
+ from script_to_app_fun_call
+ left outer join
+ app_exec
+ on (script_to_app_fun_call.function_call_id=app_exec.app_fun_call_id);
+
+
+create view dataset_primitive_and_mapped as
+ select mapped.id as dataset_id, 'mapped' as dataset_type, NULL as dataset_value, mapped.filename as dataset_filename
+ from mapped
+ union all
+ select primitive.id as dataset_id, 'primitive' as dataset_type, primitive.value as dataset_value, NULL as dataset_filename
+ from primitive;
+
+create view dataset_all as
+ select * from dataset_primitive_and_mapped
+ union all
+ select distinct dataset_containment.out_id as dataset_id, 'collection' as dataset_type, NULL as dataset_value, NULL as dataset_filename
+ from dataset_containment
+ union all
+ select collection.id as dataset_id, 'other' as dataset_type, NULL as dataset_value, NULL as dataset_filename
+ from (
+ select distinct *
+ from ds
+ where id
+ not in (
+ select dataset_id
+ from dataset_primitive_and_mapped
+ union
+ select dataset_containment.out_id as dataset_id
+ from dataset_containment)) as collection;
+
+create view dataset_all_with_annotations as
+ select dataset_all.*, annotation.key as annotation_key, annotation.numeric_value as annotation_numeric_value,
+ annotation.text_value as annotation_text_value
+ from dataset_all
+ left outer join
+ annotation
+ on (dataset_id=entity_id);
+
+create view dataset_all_with_annotations_and_parameter_names as
+ select distinct dataset_all_with_annotations.dataset_id, dataset_all_with_annotations.dataset_type, dataset_io.parameter AS script_variable,
+ dataset_all_with_annotations.dataset_value, dataset_all_with_annotations.dataset_filename,
+ dataset_all_with_annotations.annotation_key, dataset_all_with_annotations.annotation_numeric_value,
+ dataset_all_with_annotations.annotation_text_value
+ from dataset_io
+ natural join
+ dataset_all_with_annotations;
+
+create view function_call_dataflow as
+select dataset_in.dataset_id as input_dataset_id, dataset_in.parameter as input_parameter_name, dataset_in.function_call_id,
+ dataset_out.dataset_id as output_dataset_id, dataset_out.parameter as output_parameter_name
+from dataset_in, dataset_out
+where dataset_in.function_call_id=dataset_out.function_call_id
+union all
+select NULL as input_dataset_id, NULL as input_parameter_name, dataset_out.function_call_id,
+ dataset_out.dataset_id as output_dataset_id, dataset_out.parameter as output_parameter_name
+from dataset_out
+where dataset_out.function_call_id not in (select function_call_id from dataset_in)
+union all
+select dataset_in.dataset_id as input_dataset_id, dataset_in.parameter as input_parameter_name, dataset_in.function_call_id,
+ NULL as output_dataset_id, NULL as output_parameter_name
+from dataset_in
+where dataset_in.function_call_id not in (select function_call_id from dataset_out);
+
+
+create view function_call_dataflow_with_input_details as
+select dataset_all_with_annotations.dataset_id as input_dataset_id, dataset_all_with_annotations.dataset_type as input_dataset_type,
+ function_call_dataflow.input_parameter_name, dataset_all_with_annotations.dataset_value as input_dataset_value,
+ dataset_all_with_annotations.dataset_filename as input_dataset_filename,
+ dataset_all_with_annotations.annotation_key as input_dataset_annotation_key,
+ dataset_all_with_annotations.annotation_numeric_value as input_dataset_annotation_numeric_value,
+ dataset_all_with_annotations.annotation_text_value as input_dataset_annotation_text_value, function_call_dataflow.function_call_id,
+ function_call_dataflow.output_dataset_id, function_call_dataflow.output_parameter_name
+from
+dataset_all_with_annotations
+right outer join
+function_call_dataflow
+on (dataset_all_with_annotations.dataset_id=function_call_dataflow.input_dataset_id);
+
+create view function_call_dataflow_with_dataset_details as
+select function_call_dataflow_with_input_details.*, dataset_all_with_annotations.dataset_type as output_dataset_type,
+ dataset_all_with_annotations.dataset_value as output_dataset_value,
+ dataset_all_with_annotations.dataset_filename as output_dataset_filename,
+ dataset_all_with_annotations.annotation_key as output_dataset_annotation_key,
+ dataset_all_with_annotations.annotation_numeric_value as output_dataset_annotation_numeric_value,
+ dataset_all_with_annotations.annotation_text_value as output_dataset_annotation_text_value
+from function_call_dataflow_with_input_details
+ left outer join
+ dataset_all_with_annotations
+ on (dataset_all_with_annotations.dataset_id=function_call_dataflow_with_input_details.output_dataset_id);
+
+create view provenance_all as
+select script_run_id, swift_version, cog_version, script_run_final_state, script_run_start_time, script_run_duration, script_filename,
+ input_dataset_id, input_dataset_type, input_parameter_name, input_dataset_value, input_dataset_filename,
+ input_dataset_annotation_key, input_dataset_annotation_numeric_value, input_dataset_annotation_text_value,
+ script_to_app_exec.function_call_id, function_call_type, function_call_name, app_fun_call_name, app_fun_call_start_time,
+ app_fun_call_duration, app_fun_call_final_state, app_fun_call_scratch, app_exec_id, app_exec_start_time, app_exec_duration,
+ app_exec_final_state, app_exec_site, output_dataset_id, output_parameter_name, output_dataset_type, output_dataset_value,
+ output_dataset_filename, output_dataset_annotation_key, output_dataset_annotation_numeric_value, output_dataset_annotation_text_value
+from function_call_dataflow_with_dataset_details
+ left outer join
+ script_to_app_exec on (function_call_dataflow_with_dataset_details.function_call_id=script_to_app_exec.function_call_id);
+
+
+
+select script_run_id, swift_version, cog_version, script_run_final_state, script_run_start_time, script_run_duration, script_filename,
+ input_dataset_id, input_dataset_type, input_parameter_name, input_dataset_value, input_dataset_filename,
+ input_dataset_annotation_key, input_dataset_annotation_numeric_value, input_dataset_annotation_text_value,
+ script_to_app_exec.function_call_id, function_call_type, function_call_name, app_fun_call_name, app_fun_call_start_time,
+ app_fun_call_duration, app_fun_call_final_state, app_fun_call_scratch, app_exec_id, app_exec_start_time, app_exec_duration,
+ app_exec_final_state, app_exec_site, output_dataset_id, output_parameter_name, output_dataset_type, output_dataset_value,
+ output_dataset_filename, output_dataset_annotation_key, output_dataset_annotation_numeric_value, output_dataset_annotation_text_value
+from function_call_dataflow_with_dataset_details
+ right outer join
+ script_to_app_exec on (function_call_dataflow_with_dataset_details.function_call_id=script_to_app_exec.function_call_id);
+
+create view provenance_summary as
+select script_run_id, script_filename, input_dataset_id, input_dataset_type, input_parameter_name, input_dataset_value,
+ input_dataset_filename, function_call_id, function_call_type, function_call_name, output_dataset_id, output_parameter_name,
+ output_dataset_type, output_dataset_value, output_dataset_filename
+from provenance_all;
\ No newline at end of file
More information about the Swift-commit
mailing list