[Swift-commit] r5754 - provenancedb

lgadelha at ci.uchicago.edu lgadelha at ci.uchicago.edu
Tue Apr 24 06:58:20 CDT 2012


Author: lgadelha
Date: 2012-04-24 06:58:19 -0500 (Tue, 24 Apr 2012)
New Revision: 5754

Removed:
   provenancedb/compare_run.sh
Modified:
   provenancedb/pql_functions.sql
   provenancedb/prov-init.sql
   provenancedb/prov-to-sql.sh
Log:
New tables for storing prospective provenance.


Deleted: provenancedb/compare_run.sh
===================================================================
--- provenancedb/compare_run.sh	2012-04-23 13:41:59 UTC (rev 5753)
+++ provenancedb/compare_run.sh	2012-04-24 11:58:19 UTC (rev 5754)
@@ -1,15 +0,0 @@
-#!/bin/bash
-
-PROVDIR=$(dirname $0)
-pushd $PROVDIR
-PROVDIR=$(pwd)
-popd
-
-source $PROVDIR/etc/provenance.config
-export PATH=$PROVDIR:$PATH
-
-# TODO: Check python's version, should be >=2.6
-echo "DROP VIEW temp;" | $SQLCMD
-temp_view=$(python $PROVDIR/compare_run.py $@)
-echo "CREATE VIEW temp AS " $temp_view | $SQLCMD
-echo "SELECT * FROM temp;" | $SQLCMD
\ No newline at end of file

Modified: provenancedb/pql_functions.sql
===================================================================
--- provenancedb/pql_functions.sql	2012-04-23 13:41:59 UTC (rev 5753)
+++ provenancedb/pql_functions.sql	2012-04-24 11:58:19 UTC (rev 5754)
@@ -5,18 +5,15 @@
 
 -- lists variations in a parameter's value across workflows, for parameters that are in-memory variables
 
-
-
-
 drop type compare_run_by_parameter_type cascade;
-create type compare_run_by_parameter_type as (run_id varchar, param varchar, value varchar);
+create type compare_run_by_parameter_type as (run_id varchar, parameter varchar, value varchar);
 
-create or replace function compare_run_by_parameter(param_name varchar)
+create or replace function compare_run_by_parameter(parameter_name varchar)
 returns setof compare_run_by_parameter_type
 as $$
-   select run_id, param, value
-   from   ds_use,proc,in_mem
-   where  proc.id=ds_use.proc_id and ds_use.ds_id=in_mem.id and param=$1;
+   select run_id, parameter, value
+   from   ds_io,fun_call,primitive
+   where  fun_call.id=ds_io.fun_call_id and ds_io.ds_id=primitive.id and parameter=$1;
 $$ language sql;
 
 -- PostgreSQL >= 9.0
@@ -27,26 +24,26 @@
 --   value VARCHAR
 -- )
 -- AS $$
---    SELECT   proc.run_id, ds_out.param, in_mem.value
---    FROM     in_mem, ds_out, proc
---    WHERE    in_mem.id=ds_out.ds_id AND ds_out.proc_id=proc.id AND 
---             ds_out.param=$1 
---    GROUP BY proc.run_id, ds_out.param, in_mem.value
+--    SELECT   fun_call.run_id, ds_out.parameter, primitive.value
+--    FROM     primitive, ds_out, fun_call
+--    WHERE    primitive.id=ds_out.ds_id AND ds_out.fun_call_id=fun_call.id AND 
+--             ds_out.parameter=$1 
+--    GROUP BY fun_call.run_id, ds_out.parameter, primitive.value
 --  UNION
---    SELECT   proc.run_id, ds_in.param, in_mem.value
---    FROM     in_mem, ds_in, proc
---    WHERE    in_mem.id=ds_in.ds_id AND ds_in.proc_id=proc.id AND 
---             ds_in.param=$1 
---    GROUP BY proc.run_id, ds_in.param, in_mem.value	
+--    SELECT   fun_call.run_id, ds_in.parameter, primitive.value
+--    FROM     primitive, ds_in, fun_call
+--    WHERE    primitive.id=ds_in.ds_id AND ds_in.fun_call_id=fun_call.id AND 
+--             ds_in.parameter=$1 
+--    GROUP BY fun_call.run_id, ds_in.parameter, primitive.value	
 --$$ LANGUAGE SQL;
 
 
---CREATE OR REPLACE FUNCTION compare_run_by_parametereter(param_name1 VARCHAR, param_name2 VARCHAR) 
+--CREATE OR REPLACE FUNCTION compare_run_by_parametereter(parameter_name1 VARCHAR, parameter_name2 VARCHAR) 
 --RETURNS TABLE (
 --  workflow_id VARCHAR, 
---  param_name1 VARCHAR, 
+--  parameter_name1 VARCHAR, 
 --  value1 VARCHAR, 
---  param_name2 VARCHAR, 
+--  parameter_name2 VARCHAR, 
 --  value2 VARCHAR
 --) 
 --AS $$
@@ -57,30 +54,29 @@
 --         USING (workflow_id); 
 --$$ LANGUAGE SQL;
 
-
 DROP TYPE compare_run_by_annot_num_type;
 CREATE TYPE compare_run_by_annot_num_type as (run_id VARCHAR, name VARCHAR, value NUMERIC);
 
 CREATE OR REPLACE FUNCTION compare_run_by_annot_num(name VARCHAR)
 RETURNS SETOF compare_run_by_annot_num_type
 AS $$
-    SELECT proc.run_id, a_ds_n.name, a_ds_n.value
-    FROM   a_ds_n,ds_use,ds_cont,proc
-    WHERE  a_ds_n.ds_id=ds_cont.in_id AND ds_cont.out_id=ds_use.ds_id AND
-           ds_use.proc_id=proc.id AND a_ds_n.name=$1
+    SELECT fun_call.run_id, annot_ds_num.name, annot_ds_num.value
+    FROM   annot_ds_num,ds_io,ds_cont,fun_call
+    WHERE  annot_ds_num.ds_id=ds_cont.in_id AND ds_cont.out_id=ds_io.ds_id AND
+           ds_io.fun_call_id=fun_call.id AND annot_ds_num.name=$1
   UNION
-    SELECT proc.run_id, a_ds_n.name, a_ds_n.value 
-    FROM   proc, ds_use, a_ds_n
-    WHERE  proc.id=ds_use.proc_id and ds_use.ds_id=a_ds_n.ds_id and
-           a_ds_n.name=$1
+    SELECT fun_call.run_id, annot_ds_num.name, annot_ds_num.value 
+    FROM   fun_call, ds_io, annot_ds_num
+    WHERE  fun_call.id=ds_io.fun_call_id and ds_io.ds_id=annot_ds_num.ds_id and
+           annot_ds_num.name=$1
   UNION
-    SELECT proc.run_id, a_proc_n.name, a_proc_n.value 
-    FROM   proc, a_proc_n
-    WHERE  proc.id=a_proc_n.proc_id and a_proc_n.name=$1
+    SELECT fun_call.run_id, annot_fun_call_num.name, annot_fun_call_num.value 
+    FROM   fun_call, annot_fun_call_num
+    WHERE  fun_call.id=annot_fun_call_num.fun_call_id and annot_fun_call_num.name=$1
   UNION
-    SELECT run.id as run_id, a_run_n.name, a_run_n.value 
-    FROM   run, a_run_n
-    WHERE  run.id=a_run_n.run_id and a_run_n.name=$1
+    SELECT run.id as run_id, annot_run_num.name, annot_run_num.value 
+    FROM   run, annot_run_num
+    WHERE  run.id=annot_run_num.run_id and annot_run_num.name=$1
 $$ LANGUAGE SQL;
 
 DROP TYPE compare_run_by_key_numeric_type;
@@ -89,23 +85,23 @@
 CREATE OR REPLACE FUNCTION compare_run_by_key_numeric(name VARCHAR)
 RETURNS SETOF compare_run_by_key_numeric_type
 AS $$
-    SELECT proc.run_id, a_ds_n.name, a_ds_n.value
-    FROM   a_ds_n,ds_use,ds_cont,proc
-    WHERE  a_ds_n.ds_id=ds_cont.in_id AND ds_cont.out_id=ds_use.ds_id AND
-           ds_use.proc_id=proc.id AND a_ds_n.name=$1
+    SELECT fun_call.run_id, annot_ds_n.name, annot_ds_n.value
+    FROM   annot_ds_n,ds_io,ds_cont,fun_call
+    WHERE  annot_ds_n.ds_id=ds_cont.in_id AND ds_cont.out_id=ds_io.ds_id AND
+           ds_io.fun_call_id=fun_call.id AND annot_ds_n.name=$1
   UNION
-    SELECT proc.run_id, a_ds_n.name, a_ds_n.value 
-    FROM   proc, ds_use, a_ds_n
-    WHERE  proc.id=ds_use.proc_id and ds_use.ds_id=a_ds_n.ds_id and
-           a_ds_n.name=$1
+    SELECT fun_call.run_id, annot_ds_n.name, annot_ds_n.value 
+    FROM   fun_call, ds_io, annot_ds_n
+    WHERE  fun_call.id=ds_io.fun_call_id and ds_io.ds_id=annot_ds_n.ds_id and
+           annot_ds_n.name=$1
   UNION
-    SELECT proc.run_id, a_proc_n.name, a_proc_n.value 
-    FROM   proc, a_proc_n
-    WHERE  proc.id=a_proc_n.proc_id and a_proc_n.name=$1
+    SELECT fun_call.run_id, annot_fun_call_n.name, annot_fun_call_n.value 
+    FROM   fun_call, annot_fun_call_n
+    WHERE  fun_call.id=annot_fun_call_n.fun_call_id and annot_fun_call_n.name=$1
   UNION
-    SELECT run.id as run_id, a_run_n.name, a_run_n.value 
-    FROM   run, a_run_n
-    WHERE  run.id=a_run_n.run_id and a_run_n.name=$1
+    SELECT run.id as run_id, annot_run_n.name, annot_run_n.value 
+    FROM   run, annot_run_n
+    WHERE  run.id=annot_run_n.run_id and annot_run_n.name=$1
 $$ LANGUAGE SQL;
 
 
@@ -115,23 +111,23 @@
 CREATE OR REPLACE FUNCTION compare_run_by_annot_txt(name VARCHAR)
 RETURNS SETOF compare_run_by_annot_txt_type
 AS $$
-    SELECT proc.run_id, a_ds_t.name, a_ds_t.value
-    FROM   a_ds_t,ds_use,ds_cont,proc
-    WHERE  a_ds_t.ds_id=ds_cont.in_id AND ds_cont.out_id=ds_use.ds_id AND
-           ds_use.proc_id=proc.id AND a_ds_t.name=$1
+    SELECT fun_call.run_id, annot_ds_text.name, annot_ds_text.value
+    FROM   annot_ds_text,ds_io,ds_cont,fun_call
+    WHERE  annot_ds_text.ds_id=ds_cont.in_id AND ds_cont.out_id=ds_io.ds_id AND
+           ds_io.fun_call_id=fun_call.id AND annot_ds_text.name=$1
   UNION
-    SELECT proc.run_id, a_ds_t.name, a_ds_t.value 
-    FROM   proc, ds_use, a_ds_t
-    WHERE  proc.id=ds_use.proc_id and ds_use.ds_id=a_ds_t.ds_id and
-           a_ds_t.name=$1
+    SELECT fun_call.run_id, annot_ds_text.name, annot_ds_text.value 
+    FROM   fun_call, ds_io, annot_ds_text
+    WHERE  fun_call.id=ds_io.fun_call_id and ds_io.ds_id=annot_ds_text.ds_id and
+           annot_ds_text.name=$1
   UNION
-    SELECT proc.run_id, a_proc_t.name, a_proc_t.value 
-    FROM   proc, a_proc_t
-    WHERE  proc.id=a_proc_t.proc_id and a_proc_t.name=$1
+    SELECT fun_call.run_id, annot_fun_call_text.name, annot_fun_call_text.value 
+    FROM   fun_call, annot_fun_call_text
+    WHERE  fun_call.id=annot_fun_call_text.fun_call_id and annot_fun_call_text.name=$1
   UNION
-    SELECT run.id as run_id, a_run_t.name, a_run_t.value 
-    FROM   run, a_run_t
-    WHERE  run.id=a_run_t.run_id and a_run_t.name=$1
+    SELECT run.id as run_id, annot_run_text.name, annot_run_text.value 
+    FROM   run, annot_run_text
+    WHERE  run.id=annot_run_text.run_id and annot_run_text.name=$1
 $$ LANGUAGE SQL;
 
 DROP TYPE compare_run_by_key_text_type;
@@ -140,23 +136,23 @@
 CREATE OR REPLACE FUNCTION compare_run_by_key_text(name VARCHAR)
 RETURNS SETOF compare_run_by_key_text_type
 AS $$
-    SELECT proc.run_id, a_ds_t.name, a_ds_t.value
-    FROM   a_ds_t,ds_use,ds_cont,proc
-    WHERE  a_ds_t.ds_id=ds_cont.in_id AND ds_cont.out_id=ds_use.ds_id AND
-           ds_use.proc_id=proc.id AND a_ds_t.name=$1
+    SELECT fun_call.run_id, annot_ds_text.name, annot_ds_text.value
+    FROM   annot_ds_text,ds_io,ds_cont,fun_call
+    WHERE  annot_ds_text.ds_id=ds_cont.in_id AND ds_cont.out_id=ds_io.ds_id AND
+           ds_io.fun_call_id=fun_call.id AND annot_ds_text.name=$1
   UNION
-    SELECT proc.run_id, a_ds_t.name, a_ds_t.value 
-    FROM   proc, ds_use, a_ds_t
-    WHERE  proc.id=ds_use.proc_id and ds_use.ds_id=a_ds_t.ds_id and
-           a_ds_t.name=$1
+    SELECT fun_call.run_id, annot_ds_text.name, annot_ds_text.value 
+    FROM   fun_call, ds_io, annot_ds_text
+    WHERE  fun_call.id=ds_io.fun_call_id and ds_io.ds_id=annot_ds_text.ds_id and
+           annot_ds_text.name=$1
   UNION
-    SELECT proc.run_id, a_proc_t.name, a_proc_t.value 
-    FROM   proc, a_proc_t
-    WHERE  proc.id=a_proc_t.proc_id and a_proc_t.name=$1
+    SELECT fun_call.run_id, annot_fun_call_text.name, annot_fun_call_text.value 
+    FROM   fun_call, annot_fun_call_text
+    WHERE  fun_call.id=annot_fun_call_text.fun_call_id and annot_fun_call_text.name=$1
   UNION
-    SELECT run.id as run_id, a_run_t.name, a_run_t.value 
-    FROM   run, a_run_t
-    WHERE  run.id=a_run_t.run_id and a_run_t.name=$1
+    SELECT run.id as run_id, annot_run_text.name, annot_run_text.value 
+    FROM   run, annot_run_text
+    WHERE  run.id=annot_run_text.run_id and annot_run_text.name=$1
 $$ LANGUAGE SQL;
 
 -- CREATE OR REPLACE FUNCTION compare_run_by_annot_num(name VARCHAR)
@@ -166,19 +162,19 @@
 --   value NUMERIC
 -- )
 -- AS $$
---     SELECT process.workflow_id, annot_ds_num.name, annot_ds_num.value
---     FROM   annot_ds_num,ds_usage,ds_containment,process
+--     SELECT fun_call.workflow_id, annot_ds_num.name, annot_ds_num.value
+--     FROM   annot_ds_num,ds_usage,ds_containment,fun_call
 --     WHERE  annot_ds_num.id=ds_containment.in_id AND ds_containment.out_id=ds_usage.dataset_id AND
---            ds_usage.process_id=process.id AND annot_ds_num.name=$1
+--            ds_usage.fun_call_id=fun_call.id AND annot_ds_num.name=$1
 --   UNION
---     SELECT process.workflow_id, annot_ds_num.name, annot_ds_num.value 
---     FROM   process, ds_usage, annot_ds_num
---     WHERE  process.id=ds_usage.process_id and ds_usage.dataset_id=annot_ds_num.id and
+--     SELECT fun_call.workflow_id, annot_ds_num.name, annot_ds_num.value 
+--     FROM   fun_call, ds_usage, annot_ds_num
+--     WHERE  fun_call.id=ds_usage.fun_call_id and ds_usage.dataset_id=annot_ds_num.id and
 --            annot_ds_num.name=$1
 --   UNION
---     SELECT process.workflow_id, annot_p_num.name, annot_p_num.value 
---     FROM   process, annot_p_num
---     WHERE  process.id=annot_p_num.id and annot_p_num.name=$1
+--     SELECT fun_call.workflow_id, annot_p_num.name, annot_p_num.value 
+--     FROM   fun_call, annot_p_num
+--     WHERE  fun_call.id=annot_p_num.id and annot_p_num.name=$1
 --   UNION
 --     SELECT workflow.id as workflow_id, annot_wf_num.name, annot_wf_num.value 
 --     FROM   workflow, annot_wf_num
@@ -192,14 +188,14 @@
 --   name VARCHAR, 
 --   value VARCHAR) 
 -- AS $$
---     SELECT   process.workflow_id, annot_ds_txt.name, annot_ds_txt.value 
---     FROM     process, ds_usage, annot_ds_txt
---     WHERE    process.id=ds_usage.process_id and ds_usage.dataset_id=annot_ds_txt.id and
+--     SELECT   fun_call.workflow_id, annot_ds_txt.name, annot_ds_txt.value 
+--     FROM     fun_call, ds_usage, annot_ds_txt
+--     WHERE    fun_call.id=ds_usage.fun_call_id and ds_usage.dataset_id=annot_ds_txt.id and
 --              annot_ds_txt.name=$1
 --   UNION
---     SELECT   process.workflow_id, annot_p_txt.name, annot_p_txt.value 
---     FROM     process, annot_p_txt
---     WHERE    process.id=annot_p_txt.id and annot_p_txt.name=$1
+--     SELECT   fun_call.workflow_id, annot_p_txt.name, annot_p_txt.value 
+--     FROM     fun_call, annot_p_txt
+--     WHERE    fun_call.id=annot_p_txt.id and annot_p_txt.name=$1
 --   UNION
 --     SELECT   workflow.id as workflow_id, annot_wf_txt.name, annot_wf_txt.value 
 --     FROM     workflow, annot_wf_txt
@@ -214,14 +210,14 @@
   value BOOLEAN
 ) 
 AS $$
-    SELECT   process.workflow_id, annot_ds_bool.name, annot_ds_bool.value 
-    FROM     process, ds_usage, annot_ds_bool
-    WHERE    process.id=ds_usage.process_id and ds_usage.dataset_id=annot_ds_bool.id and
+    SELECT   fun_call.workflow_id, annot_ds_bool.name, annot_ds_bool.value 
+    FROM     fun_call, ds_usage, annot_ds_bool
+    WHERE    fun_call.id=ds_usage.fun_call_id and ds_usage.dataset_id=annot_ds_bool.id and
              annot_ds_bool.name=$1
   UNION
-    SELECT   process.workflow_id, annot_p_bool.name, annot_p_bool.value 
-    FROM     process, annot_p_bool
-    WHERE    process.id=annot_p_bool.id and annot_p_bool.name=$1
+    SELECT   fun_call.workflow_id, annot_p_bool.name, annot_p_bool.value 
+    FROM     fun_call, annot_p_bool
+    WHERE    fun_call.id=annot_p_bool.id and annot_p_bool.name=$1
   UNION
     SELECT   workflow.id as workflow_id, annot_wf_bool.name, annot_wf_bool.value 
     FROM     workflow, annot_wf_bool
@@ -230,7 +226,7 @@
 
 
 -- correlate a parameter with workflow runtime statistics
-CREATE OR REPLACE FUNCTION correlate_param_runtime(param_name VARCHAR) 
+CREATE OR REPLACE FUNCTION correlate_parameter_runtime(parameter_name VARCHAR) 
 RETURNS TABLE (
     workflow VARCHAR,  
     workflow_starttime TIMESTAMP WITH TIME ZONE, 
@@ -239,10 +235,10 @@
     parameter_value VARCHAR
 ) 
 AS $$
-	SELECT workflow.id,to_timestamp(workflow.start_time),workflow.duration,ds_usage.param_name,variable.value
-	FROM   variable,ds_usage,process,workflow
-	WHERE  variable.id=ds_usage.dataset_id AND ds_usage.process_id=process.id AND 
-	       process.workflow_id=workflow.id AND ds_usage.param_name=$1 
+	SELECT workflow.id,to_timestamp(workflow.start_time),workflow.duration,ds_usage.parameter_name,variable.value
+	FROM   variable,ds_usage,fun_call,workflow
+	WHERE  variable.id=ds_usage.dataset_id AND ds_usage.fun_call_id=fun_call.id AND 
+	       fun_call.workflow_id=workflow.id AND ds_usage.param_name=$1 
 $$ LANGUAGE SQL;
 
 -- recursive query to find ancestor entities in a provenance graph
@@ -263,7 +259,7 @@
 $$ LANGUAGE SQL;
 
 
--- compare(<entity>, <list of param_names, annotations keys>
+-- compare(<entity>, <list of parameter_names, annotations keys>
 CREATE OR REPLACE FUNCTION compare_run(VARIADIC args VARCHAR[])
 RETURNS SETOF RECORD AS $$
 DECLARE 
@@ -280,7 +276,7 @@
     property_type := split_part(args[i], '=', 1);
     property := split_part(args[i], '=', 2);
     CASE property_type
-    WHEN 'param_name' THEN
+    WHEN 'parameter_name' THEN
       function_name := 'compare_run_by_parameter';
     WHEN 'annot_num' THEN
       function_name := 'compare_run_by_annot_num';

Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql	2012-04-23 13:41:59 UTC (rev 5753)
+++ provenancedb/prov-init.sql	2012-04-24 11:58:19 UTC (rev 5754)
@@ -1,42 +1,67 @@
 -- this is the schema definition used for the main relational provenance
 -- implementation (in both sqlite3 and postgres)
 
+-- base relations
+drop table app_catalog cascade;
+drop table site_catalog cascade;
+drop table script cascade;
 drop table run cascade;
-drop table proc cascade;
-drop table app_inv cascade;
+drop table fun_call cascade;
+drop table app_fun_call cascade;
 drop table app_exec cascade;
 drop table rt_info cascade;
 drop table ds cascade;
-drop table file cascade;
-drop table in_mem cascade;
-drop table ds_cont cascade;
+drop table mapped cascade;
+drop table primitive cascade;
+drop table ds_containment cascade;
 drop table ds_in cascade;
 drop table ds_out cascade;
-drop table a_ds_n cascade;
-drop table a_ds_t cascade;
-drop table a_proc_n cascade;
-drop table a_proc_t cascade;
-drop table a_run_n cascade;
-drop table a_run_t cascade;
+drop table annot_run_num cascade;
+drop table annot_run_text cascade;
+drop table annot_fun_call_num cascade;
+drop table annot_fun_call_text cascade;
+drop table annot_app_exec_num cascade;
+drop table annot_app_exec_text cascade;
+drop table annot_ds_num cascade;
+drop table annot_ds_text cascade;
 
+-- application_catalog stores tc.file
+create table app_catalog (
+       hash_value		 varchar(256) primary key,
+       content			 text
+);
 
+-- application_catalog stores tc.file
+create table site_catalog (
+	hash_value		varchar(256) primary key,
+	content			text
+);
+	
+-- script stores Swift script source codes
+create table script (
+	hash_value		varchar(256) primary key,
+	content			text
+);
+
 -- run stores information about each script run log that has
 -- been seen by the importer: the log filename, swift version and import
 -- status.
 -- Might be interesting to store xml translation of the Swift script
 -- here for prospective provenance and versioning.
 create table run
-    (id            varchar(256) primary key,
-     log_filename  varchar(2048),
-     swift_version varchar(16),
-     cog_version   varchar(16),
-     final_state   varchar(32),
-     start_time    numeric,
-     duration      numeric,
-     script_source text,
-     tc_file	   text,
-     sites_file	   text
-    );
+    (
+     id				varchar(256) primary key,
+     log_filename 		varchar(2048),
+     swift_version 		varchar(16),
+     cog_version   		varchar(16),
+     final_state   		varchar(32),
+     start_time    		numeric,
+     duration      		numeric,
+     script_filename		varchar(2048),
+     script_hash		varchar(256) references script (hash_value),
+     application_catalog_hash	varchar(256) references app_catalog (hash_value),
+     site_catalog_hash	   	varchar(256) references site_catalog (hash_value)
+);
 
 -- process gives information about each process (in the OPM sense)
 -- it is augmented by information in other tables
@@ -44,32 +69,35 @@
 -- must be the case that the specific type table
 -- has an entry for this process.
 -- process types: internal, rootthread, execute, function, compound, scope, operator
-create table proc
-    (id     varchar(256) primary key, 
+create table fun_call
+    (
+     id     varchar(256) primary key, 
      type   varchar(16),
      name   varchar(256), -- in the case of an execute this refers to the transformation name in tc.data
      run_id varchar(256) references run (id) on delete cascade   -- normalize: workflow_id of sub-procedure determined
           	 	      		 	       	  	 	   -- by compound procedure 
-    );
+);
 
 -- this gives information about each execute.
 -- each execute is identified by a unique URI. other information from
 -- swift logs is also stored here. an execute is an OPM process.
-create table app_inv
-    (id             varchar(256) primary key references proc (id) on delete cascade,  
-     proc_name      varchar(256), -- name of the app procedure that invokes the transformation
-     start_time     numeric,
-     duration       numeric,
-     final_state    varchar(32),
-     scratch        varchar(2048)
-    );
+create table app_fun_call
+    (
+     id			varchar(256) primary key references fun_call (id) on delete cascade,  
+     name      		varchar(256), -- name of the app procedure that invokes the transformation
+     start_time	    	numeric,
+     duration		numeric,
+     final_state	varchar(32),
+     scratch		varchar(2048)
+);
 
--- this gives information about each execute2, which is an attempt to
--- perform an execution. the execute2 id is tied to per-execution-attempt
+-- this gives information about each application execution attempt, including
+-- aggregate resource consumption. the app_exec_id is tied to per-execution-attempt
 -- information such as wrapper logs
 create table app_exec
-    (id                varchar(256) primary key,
-     app_inv_id        varchar(256) references app_inv (id) on delete cascade, 
+    (
+     id                varchar(256) primary key,
+     app_fun_call_id   varchar(256) references app_fun_call (id) on delete cascade, 
      start_time        numeric,
      duration          numeric,
      final_state       varchar(32),
@@ -81,37 +109,40 @@
      cpu	       numeric,
      fsin	       numeric,
      fsout	       numeric,
-     timesswapped      numeric,
+     timesswpd         numeric,
      socketrecv	       numeric,
      socketsent	       numeric,
-     majorpagefaults   numeric,
-     minorpagefaults   numeric,
-     contextswitchesinv	numeric,
-     contextswitchesvol	numeric
-    );
+     majpfaults        numeric,
+     minpfaults        numeric,
+     ctxswinv	       numeric,
+     ctxswvol	       numeric
+);
 
 -- app execution runtime info extracted from the /proc filesystem (assumes the app executed
 -- in a Linux host) 
 create table rt_info
-   ( app_exec_id        varchar(256) references app_exec (id) on delete cascade, 
-     tstamp		numeric,
+   ( 
+     app_exec_id        varchar(256) references app_exec (id) on delete cascade, 
+     timestamp		numeric,
      cpu_usage          numeric,
      max_phys_mem	numeric,
      max_virt_mem	numeric,
      io_read		numeric,
      io_write		numeric,
-     primary key (app_exec_id, tstamp)
-   );
+     primary key (app_exec_id, timestamp)
+);
 
 -- ds stores all dataset identifiers.
 create table ds
-    (id varchar(256) primary key
+    (
+      id	 varchar(256) primary key
     );
 
 -- file stores the filename mapped to each dataset. 
-create table file
-    ( id         varchar(256) primary key references ds (id) on delete cascade,
-      name       varchar(2048)
+create table mapped
+    ( 
+      id	 varchar(256) primary key references ds (id) on delete cascade,
+      filename   varchar(2048)
     );
 
 -- dataset_values stores the value for each dataset which is known to have
@@ -119,7 +150,7 @@
 -- to expose that value as an SQL type other than a string, and so (for
 -- example) SQL numerical operations should not be expected to work, even
 -- though the user knows that a particular dataset stores a numeric value.
-create table in_mem
+create table primitive
     ( id    varchar(256) primary key references ds (id) on delete cascade,
       value varchar(2048)
     );
@@ -131,7 +162,7 @@
 -- constructors and accessors, rather than, or in addition to,
 -- a containment hierarchy. The relationship (such as array index or
 -- structure member name) should also be stored in this table.
-create table ds_cont
+create table ds_containment
     ( out_id varchar(256) references ds (id) on delete cascade,
       in_id  varchar(256) references ds (id) on delete cascade,
       primary key (out_id,in_id)
@@ -143,142 +174,157 @@
 -- application procedure invocation; in OPM terms, the artificts which are
 -- input to and output from each process that is a Swift execution
 create table ds_in
-    (proc_id varchar(256) references proc(id) on delete cascade, 
-     ds_id   varchar(256) references ds(id) on delete cascade,
-     param   varchar(256), -- the name of the parameter in this execute that
-                              -- this dataset was bound to. sometimes this must
-                              -- be contrived (for example, in positional varargs)
-     primary key (proc_id,ds_id,param)
+    (
+     fun_call_id	varchar(256) references fun_call (id) on delete cascade, 
+     ds_id   		varchar(256) references ds (id) on delete cascade,
+     parameter   	varchar(256), -- the name of the parameter in this execute that
+                             	      -- this dataset was bound to. sometimes this must
+                              	      -- be contrived (for example, in positional varargs)
+     primary key (fun_call_id,ds_id,parameter)
     );
 
 create table ds_out
-    (proc_id varchar(256) references proc(id) on delete cascade, 
-     ds_id   varchar(256) references ds(id) on delete cascade,
-     param   varchar(256), -- the name of the parameter in this execute that
-                              -- this dataset was bound to. sometimes this must
-                              -- be contrived (for example, in positional varargs)
-     primary key (proc_id,ds_id,param)
+    (
+     fun_call_id	varchar(256) references fun_call (id) on delete cascade, 
+     ds_id   		varchar(256) references ds (id) on delete cascade,
+     parameter   	varchar(256), -- the name of the parameter in this execute that
+                              	      -- this dataset was bound to. sometimes this must
+                              	      -- be contrived (for example, in positional varargs)
+     primary key (fun_call_id,ds_id,parameter)
     );
 
-drop view ds_use;
 
-create view ds_use as
- SELECT ds_in.proc_id as function_call_id, ds_in.ds_id as variable_id, ds_in.param as parameter
-   FROM ds_in
-UNION ALL 
- SELECT ds_out.proc_id as function_call_id, ds_out.ds_id as variable_id, ds_out.param as parameter
-   FROM ds_out;
-
-
--- annotations (_n: numeric, _t: text)
-create table a_ds_n
+-- annotations 
+create table annot_ds_num
    ( ds_id varchar(256) references ds (id) on delete cascade, 
      name  varchar(256),
      value numeric,
      primary key (ds_id, name)
    );
 
-create table a_ds_t
+create table annot_ds_text
    ( ds_id varchar(256) references ds (id) on delete cascade, 
      name  varchar(256),
      value varchar(2048),
      primary key (ds_id, name)
    );
 
-create table a_proc_n
-   ( proc_id    varchar(256) references proc (id) on delete cascade, 
+create table annot_fun_call_num
+   ( fun_call_id    varchar(256) references fun_call (id) on delete cascade, 
      name       varchar(256),
      value      numeric,
-     primary key (proc_id, name)
+     primary key (fun_call_id, name)
    );
 
-create table a_proc_t
-   ( proc_id    varchar(256) references proc (id) on delete cascade, 
+create table annot_fun_call_text
+   ( fun_call_id    varchar(256) references fun_call (id) on delete cascade, 
      name       varchar(256),
      value      varchar(2048),
-     primary key (proc_id, name)
+     primary key (fun_call_id, name)
    );
 
-create table a_run_n
+create table annot_run_num
    ( run_id    varchar(256) references run (id) on delete cascade, 
      name      varchar(256),
      value     numeric,
      primary key (run_id, name)
    );
 
-create table a_run_t
+create table annot_run_text
    ( run_id    varchar(256) references run (id) on delete cascade, 
      name      varchar(256),
      value     varchar(2048),
      primary key (run_id, name)
    );
 
-create table iq
-   ( idx      serial primary key,
-     q        varchar(2048)
+create table annot_app_exec_num
+   ( run_id    varchar(256) references run (id) on delete cascade, 
+     name      varchar(256),
+     value     numeric,
+     primary key (run_id, name)
    );
 
+create table annot_app_exec_text
+   ( run_id    varchar(256) references run (id) on delete cascade, 
+     name      varchar(256),
+     value     varchar(2048),
+     primary key (run_id, name)
+   );
 
+
+-- create table iq
+--    ( idx      serial primary key,
+--     q        varchar(2048)
+--   );
+drop view ds_io;
+create view ds_io as
+ select ds_in.fun_call_id as function_call_id, ds_in.ds_id as variable_id, ds_in.parameter
+ from   ds_in
+union all 
+ select ds_out.fun_call_id as function_call_id, ds_out.ds_id as variable_id, ds_out.parameter
+ from   ds_out;
+
 drop view pgraph_edge;
 create view pgraph_edge as 
-       select proc_id as parent,ds_id as child from ds_out
+       select fun_call_id as parent,ds_id as child from ds_out
        union all
-       select ds_id as parent,proc_id as child from ds_in
+       select ds_id as parent,fun_call_id as child from ds_in
        union all
-       select out_id as parent,in_id as child from ds_cont;
+       select out_id as parent,in_id as child from ds_containment;
 
-drop view a_t cascade;
-create view a_t as
+drop view annot_text cascade;
+create view annot_text as
     select *
-    from a_run_t 
+    from annot_run_text 
   union all
     select * 
-    from a_ds_t 
+    from annot_ds_text 
   union all 
     select * 
-    from a_proc_t;
+    from annot_fun_call_text;
 
-drop view a_n cascade;
-create view a_n as
+drop view annot_num cascade;
+create view annot_num as
     select *
-    from a_run_n 
+    from annot_run_num 
   union all
     select * 
-    from a_ds_n 
+    from annot_ds_num 
   union all 
     select * 
-    from a_proc_n;
+    from annot_fun_call_num;
 
 -- views used for queries based on the schema summary
 
 drop view function_call;
 
 create view function_call as 
-    select proc.id, proc.name as name, proc.type, app_inv.proc_name as tc_name, proc.run_id as script_run_id,  
-           to_timestamp(app_inv.start_time) as start_time, app_inv.duration, app_inv.final_state, app_inv.scratch 
-    from proc 
+    select fun_call.id, fun_call.name as name, fun_call.type, app_fun_call.name as app_catalog_name, fun_call.run_id as script_run_id,  
+           to_timestamp(app_fun_call.start_time) as start_time, app_fun_call.duration, app_fun_call.final_state, app_fun_call.scratch 
+    from fun_call 
     left outer join 
-    app_inv on proc.id=app_inv.id;
+    app_fun_call on fun_call.id=app_fun_call.id;
 
 drop view variable;
 
 create view variable as 
-    select file.id, 'mapped' as type, file.name as filename, null as value
-    from file
+    select mapped.id, 'mapped' as type, mapped.filename, null as value
+    from mapped
   union all 
-    select in_mem.id, 'primitive' as type, null as filename, in_mem.value
-    from in_mem
+    select primitive.id, 'primitive' as type, null as filename, primitive.value
+    from primitive
   union all
-    select ds_cont.out_id as id, 'composite' as type, null as filename, null as value from ds_cont;
+    select ds_containment.out_id as id, 'composite' as type, null as filename, null as value 
+    from ds_containment;
 
 drop view annotation;
 
 create view annotation as 
-    select a_t.run_id as id, a_t.name as key, a_t.value as string_value, null as numeric_value
-    from a_t
+    select annot_text.run_id as id, annot_text.name as key, annot_text.value as string_value, null as numeric_value
+    from annot_text
   union all
-    select a_n.run_id as id, a_n.name as key, null as string_value, a_n.value as numeric_value
-    from a_n;
+    select annot_num.run_id as id, annot_num.name as key, null as string_value, annot_num.value as numeric_value
+    from annot_num;
    
 drop view script_run;
 
@@ -290,28 +336,27 @@
 drop view application_execution;
 
 create view application_execution as
-  select id, app_inv_id as function_call_id, to_timestamp(start_time) as start_time, duration, final_state, site
+  select id, app_fun_call_id as function_call_id, to_timestamp(start_time) as start_time, duration, final_state, site
   from   app_exec;
 
-
 drop view runtime_info;
 
 create view runtime_info as
-  select app_exec_id as application_execution_id, to_timestamp(tstamp) as timestamp, cpu_usage, max_phys_mem, max_virt_mem, io_read, io_write
+  select app_exec_id as application_execution_id, to_timestamp(timestamp) as timestamp, cpu_usage, max_phys_mem, max_virt_mem, io_read, io_write
   from rt_info;
 
 drop view produces;
 
 create view produces as
-  select proc_id as function_call_id, ds_id as variable_id, param as parameter from ds_out;
+  select fun_call_id as function_call_id, ds_id as variable_id, parameter from ds_out;
 
 drop view consumes;
 
 create view consumes as
-  select proc_id as function_call_id, ds_id as variable_id, param as parameter from ds_in;
+  select fun_call_id as function_call_id, ds_id as variable_id, parameter from ds_in;
 
 drop view variable_containment;
 
 create view variable_containment as
   select out_id as container, in_id as containee
-  from   ds_cont;
+  from   ds_containment;

Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh	2012-04-23 13:41:59 UTC (rev 5753)
+++ provenancedb/prov-to-sql.sh	2012-04-24 11:58:19 UTC (rev 5754)
@@ -12,12 +12,12 @@
 # this gives a distinction between the root process for a workflow and the
 # workflow itself. perhaps better to model the workflow as a process
 echo "    - Root thread."
-echo "INSERT INTO proc (id, type, name, run_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" >> /tmp/$RUNID.sql
+echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" >> /tmp/$RUNID.sql
 
 echo "    - Function calls."
 while read time duration thread localthread endstate tr_name scratch; do
-    echo "INSERT INTO proc (id, type, run_id) VALUES ('$thread', 'execute', '$WF');"  >> /tmp/$RUNID-1.sql
-    echo "INSERT INTO app_inv (id, proc_name, start_time, duration, final_state, scratch) VALUES ('$thread', '$tr_name', $time, $duration, '$endstate', '$scratch');"   >> /tmp/$RUNID-2.sql
+    echo "INSERT INTO fun_call (id, type, run_id) VALUES ('$thread', 'execute', '$WF');"  >> /tmp/$RUNID-1.sql
+    echo "INSERT INTO app_fun_call (id, fun_call_name, start_time, duration, final_state, scratch) VALUES ('$thread', '$tr_name', $time, $duration, '$endstate', '$scratch');"   >> /tmp/$RUNID.sql
 done < execute.global.event
 
 echo "    - Application executions."
@@ -25,32 +25,32 @@
     # cut off the last component of the thread, so that we end up at the
     # parent thread id which should correspond with the execute-level ID
     inv_id="$WFID$(echo $thread | sed 's/-[^-]*$//')"
-    echo  "INSERT INTO app_exec (id, app_inv_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');"  >> /tmp/$RUNID-3.sql
+    echo  "INSERT INTO app_exec (id, app_fun_call_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');"  >> /tmp/$RUNID.sql
 done < execute2.global.event
 
 echo "    - Mapped variables."
 while read dataset filename; do
-    echo "INSERT INTO ds (id) VALUES ('$dataset');"  >> /tmp/$RUNID-4.sql
-    echo "INSERT INTO file (id, name) VALUES ('$dataset', '$filename');"  >> /tmp/$RUNID-5.sql
+    echo "INSERT INTO ds (id) VALUES ('$dataset');"  >> /tmp/$RUNID.sql
+    echo "INSERT INTO mapped (id, filename) VALUES ('$dataset', '$filename');"  >> /tmp/$RUNID.sql
 done < dataset-filenames.txt
 
 echo "    - Primitive variables."
 while read dataset idtype equal value rest; do
-    echo "INSERT INTO ds (id) VALUES ('$dataset');"  >> /tmp/$RUNID-4.sql
-    echo "INSERT INTO in_mem (id, value) VALUES ('$dataset', '$value');"  >> /tmp/$RUNID-5.sql
+    echo "INSERT INTO ds (id) VALUES ('$dataset');"  >> /tmp/$RUNID.sql
+    echo "INSERT INTO primitive (id, value) VALUES ('$dataset', '$value');"  >> /tmp/$RUNID.sql
 done < dataset-values.txt
 
 echo "    - Arrays and structures."
 while read outer inner; do
-    echo  "INSERT INTO ds (id) VALUES ('$outer');"  >> /tmp/$RUNID-4.sql
-    echo  "INSERT INTO ds (id) VALUES ('$inner');"  >> /tmp/$RUNID-4.sql
-    echo  "INSERT INTO ds_cont (out_id, in_id) VALUES ('$outer', '$inner');"  >> /tmp/$RUNID-5.sql
-    echo  "INSERT INTO proc (id, type, name, run_id) VALUES ('${WFID}constructor:$outer', 'constructor', 'constructor', '$WF');"  >> /tmp/$RUNID-1.sql
-    echo  "INSERT INTO ds_in (proc_id, ds_id, param) VALUES ('${WFID}constructor:$outer', '$inner', 'element');"  >> /tmp/$RUNID-5.sql
-    echo  "INSERT INTO ds_out (proc_id, ds_id, param) VALUES ('${WFID}constructor:$outer', '$outer', 'collection');"  >> /tmp/$RUNID-5.sql
+    echo  "INSERT INTO ds (id) VALUES ('$outer');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO ds (id) VALUES ('$inner');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO fun_call (id, type, name, run_id) VALUES ('${WFID}constructor:$outer', 'constructor', 'constructor', '$WF');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO ds_in (fun_call_id, ds_id, parameter) VALUES ('${WFID}constructor:$outer', '$inner', 'element');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO ds_out (fun_call_id, ds_id, parameter) VALUES ('${WFID}constructor:$outer', '$outer', 'collection');"  >> /tmp/$RUNID.sql
 done < tie-containers.txt
 
-echo "    - Operators."
+echo "    - Operator calls."
 while read col1 col2 col3 col4 col5 thread name lhs rhs result; do
     thread=$(echo $thread | awk 'BEGIN { FS = "=" }; {print $2}')
     name=$(echo $name | awk 'BEGIN { FS = "=" }; {print $2}')
@@ -60,36 +60,36 @@
     
     operatorid="${WFID}operator:$thread"
     
-    echo  "INSERT INTO ds (id) VALUES ('$lhs');" >> /tmp/$RUNID-4.sql
-    echo  "INSERT INTO ds (id) VALUES ('$rhs');" >> /tmp/$RUNID-4.sql
-    echo  "INSERT INTO ds (id) VALUES ('$result');" >> /tmp/$RUNID-4.sql
-    echo  "INSERT INTO proc (id, type, name, run_id) VALUES ('$operatorid', 'operator', '$name', '$WF');"  >> /tmp/$RUNID-1.sql
-    echo  "INSERT INTO ds_in (proc_id, ds_id, param) VALUES ('$operatorid', '$lhs', 'lhs');"  >> /tmp/$RUNID-5.sql
-    echo  "INSERT INTO ds_in (proc_id, ds_id, param) VALUES ('$operatorid', '$rhs', 'rhs');"  >> /tmp/$RUNID-5.sql
-    echo  "INSERT INTO ds_out (proc_id, ds_id, param) VALUES ('$operatorid', '$result', 'result');"  >> /tmp/$RUNID-5.sql
+    echo  "INSERT INTO ds (id) VALUES ('$lhs');" >> /tmp/$RUNID.sql
+    echo  "INSERT INTO ds (id) VALUES ('$rhs');" >> /tmp/$RUNID.sql
+    echo  "INSERT INTO ds (id) VALUES ('$result');" >> /tmp/$RUNID.sql
+    echo  "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$operatorid', 'operator', '$name', '$WF');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO ds_in (fun_call_id, ds_id, parameter) VALUES ('$operatorid', '$lhs', 'lhs');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO ds_in (fun_call_id, ds_id, parameter) VALUES ('$operatorid', '$rhs', 'rhs');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO ds_out (fun_call_id, ds_id, parameter) VALUES ('$operatorid', '$result', 'result');"  >> /tmp/$RUNID.sql
 done < operators.txt
 
-echo "    - Built-in functions."
+echo "    - Built-in function calls."
 while read id name output; do
-    echo  "INSERT INTO ds (id) VALUES ('$output');"  >> /tmp/$RUNID-4.sql
-    echo  "INSERT INTO proc (id, type, name, run_id) VALUES ('$id', 'function', '$name', '$WF');"  >> /tmp/$RUNID-1.sql
-    echo  "INSERT INTO ds_out (proc_id, ds_id, param) VALUES ('$id', '$output', 'result');"  >> /tmp/$RUNID-5.sql
+    echo  "INSERT INTO ds (id) VALUES ('$output');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$id', 'function', '$name', '$WF');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO ds_out (fun_call_id, ds_id, parameter) VALUES ('$id', '$output', 'result');"  >> /tmp/$RUNID.sql
 done < functions.txt
 
 while read id value; do
-    echo  "INSERT INTO ds (id) VALUES ('$value');" >> /tmp/$RUNID-4.sql
-    echo  "INSERT INTO ds_in (proc_id, ds_id, param) VALUES ('$id', '$value', 'undefined');"  >> /tmp/$RUNID-5.sql
+    echo  "INSERT INTO ds (id) VALUES ('$value');" >> /tmp/$RUNID.sql
+    echo  "INSERT INTO ds_in (fun_call_id, ds_id, parameter) VALUES ('$id', '$value', 'undefined');"  >> /tmp/$RUNID.sql
 done < function-inputs.txt
 
 echo "    - Function call names."
 while read thread appname; do
-    echo  "UPDATE proc SET name='$appname' WHERE id='$thread';"  >> /tmp/$RUNID-3.sql
+    echo  "UPDATE fun_call SET name='$appname' WHERE id='$thread';"  >> /tmp/$RUNID.sql
 done < invocation-procedure-names.txt
 
 echo "    - Script run events."
 while read start duration wfid rest; do
-    echo "UPDATE run SET start_time=$start WHERE id='$WF';"  >> /tmp/$RUNID-1.sql
-    echo "UPDATE run SET duration=$duration WHERE id='$WF';"  >> /tmp/$RUNID-1.sql
+    echo "UPDATE run SET start_time=$start WHERE id='$WF';"  >> /tmp/$RUNID.sql
+    echo "UPDATE run SET duration=$duration WHERE id='$WF';"  >> /tmp/$RUNID.sql
 done < workflow.event
 
 
@@ -98,20 +98,20 @@
 while read start duration thread final_state procname ; do
     if [ "$duration" != "last-event-line" ]; then
 	compoundid=$WFID$thread
-	echo "INSERT INTO proc (id, type, name, run_id) VALUES ('$compoundid', 'compound', '$procname', '$WF');"  >> /tmp/$RUNID-1.sql
+	echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$compoundid', 'compound', '$procname', '$WF');"  >> /tmp/$RUNID.sql
     fi
 done < compound.event
 
 while read start duration thread final_state procname ; do
     if [ "$duration" != "last-event-line" ]; then
 	fqid=$WFID$thread
-	echo "INSERT INTO proc (id, type, name, run_id) VALUES ('$fqid', 'internal', '$procname', '$WF');"  >> /tmp/$RUNID-1.sql
+	echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$fqid', 'internal', '$procname', '$WF');"  >> /tmp/$RUNID.sql
     fi	
 done < internalproc.event
 
 while read t ; do 
     thread="${WFID}$t"
-    echo "INSERT INTO proc (id, type, name, run_id) VALUES ('$thread', 'scope', 'scope', '$WF');"  >> /tmp/$RUNID-1.sql
+    echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$thread', 'scope', 'scope', '$WF');"  >> /tmp/$RUNID.sql
 done < scopes.txt
 
 echo "    - Variable consumption and production."
@@ -122,8 +122,8 @@
 	table=ds_out
     fi
     
-	echo "INSERT INTO ds (id) VALUES ('$dataset');"  >> /tmp/$RUNID-4.sql
-    echo "INSERT INTO $table (proc_id, ds_id, param) VALUES ('$thread', '$dataset', '$variable');"  >> /tmp/$RUNID-5.sql
+    echo "INSERT INTO ds (id) VALUES ('$dataset');"  >> /tmp/$RUNID.sql
+    echo "INSERT INTO $table (fun_call_id, ds_id, parameter) VALUES ('$thread', '$dataset', '$variable');"  >> /tmp/$RUNID.sql
 done < tie-data-invocs.txt
 
 echo "    - Wrapper log extra info."
@@ -132,13 +132,13 @@
 	echo $extrainfo | awk -F ";"  '{ for (i = 1; i <= NF; i++)
                                                print $i
                                          }' | awk -F "=" '{ print $1 " " $2 }' | awk -F ":" '{ print $1 " " $2 }' > fields.txt
-	id=$($SQLCMD --tuples-only -c "select app_inv_id from app_exec where id='$execute2_id';" | awk '{print $1}')
+	id=$($SQLCMD --tuples-only -c "select app_fun_call_id from app_exec where id='$execute2_id';" | awk '{print $1}')
 	while read name type value; do
 	    if [ "$type" = "num" ]; then
-		echo "INSERT INTO a_proc_n (id, name, value) VALUES ('$id', '$name', $value);"  >> /tmp/$RUNID-6.sql
+		echo "INSERT INTO annot_fun_call_num (id, name, value) VALUES ('$id', '$name', $value);"  >> /tmp/$RUNID.sql
 	    fi 
 	    if [ "$type" = "txt" ]; then
-		echo "INSERT INTO a_proc_t (id, name, value) VALUES ('$id', '$name', '$value');"  >> /tmp/$RUNID-6.sql
+		echo "INSERT INTO annot_fun_call_text (id, name, value) VALUES ('$id', '$name', '$value');"  >> /tmp/$RUNID.sql
 	    fi
 	done < fields.txt
     done < extrainfo.txt
@@ -153,7 +153,7 @@
 	#max_virtual_mem=$(echo $runtime | awk -F "," '{print $4}' | awk -F ":" '{print $2}')
 	#io_read_bytes=$(echo $runtime | awk -F "," '{print $5}' | awk -F ":" '{print $2}')
 	#io_write_bytes=$(echo $runtime | awk -F "," '{print $6}' | awk -F ":" '{print $2}')
-	#echo "INSERT INTO rt_info (app_exec_id, tstamp, cpu_usage, max_phys_mem, max_virt_mem, io_read, io_write) VALUES ('$execute2_id', $timestamp, $cpu_usage, $max_phys_mem, $max_virtual_mem, $io_read_bytes, $io_write_bytes);"  >> /tmp/$RUNID-6.sql
+	#echo "INSERT INTO rt_info (app_exec_id, tstamp, cpu_usage, max_phys_mem, max_virt_mem, io_read, io_write) VALUES ('$execute2_id', $timestamp, $cpu_usage, $max_phys_mem, $max_virtual_mem, $io_read_bytes, $io_write_bytes);"  >> /tmp/$RUNID.sql
 	maxrss=$(echo $runtime | awk -F "," '{print $1}' | awk -F ":" '{print $2}')
 	walltime=$(echo $runtime | awk -F "," '{print $2}' | awk -F ":" '{print $2}')
 	systime=$(echo $runtime | awk -F "," '{print $3}' | awk -F ":" '{print $2}')
@@ -168,18 +168,10 @@
 	minorpagefaults=$(echo $runtime | awk -F "," '{print $12}' | awk -F ":" '{print $2}')
 	contextswitchesinv=$(echo $runtime | awk -F "," '{print $13}' | awk -F ":" '{print $2}')
 	contextswitchesvol=$(echo $runtime | awk -F "," '{print $14}' | awk -F ":" '{print $2}')
-	echo "UPDATE app_exec SET maxrss=$maxrss, walltime=$walltime, systime=$systime, usertime=$usertime, cpu=$cpu, fsin=$fsin, fsout=$fsout, timesswapped=$timesswapped, socketrecv=$socketrecv, socketsent=$socketsent, majorpagefaults=$majorpagefaults, minorpagefaults=$minorpagefaults, contextswitchesinv=$contextswitchesinv, contextswitchesvol=$contextswitchesvol where id='$execute2_id';"  >> /tmp/$RUNID-6.sql
+	echo "UPDATE app_exec SET maxrss=$maxrss, walltime=$walltime, systime=$systime, usertime=$usertime, cpu=$cpu, fsin=$fsin, fsout=$fsout, timesswapped=$timesswapped, socketrecv=$socketrecv, socketsent=$socketsent, majorpagefaults=$majorpagefaults, minorpagefaults=$minorpagefaults, contextswitchesinv=$contextswitchesinv, contextswitchesvol=$contextswitchesvol where id='$execute2_id';"  >> /tmp/$RUNID.sql
     done < runtime.txt
 fi
 
-for i in `seq 1 6`
-do
-	cat /tmp/$RUNID-$i.sql | sort | uniq >> /tmp/$RUNID.sql
-	rm /tmp/$RUNID-$i.sql
-done
-
-#echo "COMMIT;" >> /tmp/$RUNID.sql
-
 echo "Finished SQL generation."
 echo "Exporting provenance to database..."
 $SQLCMD -f /tmp/$RUNID.sql 1> /tmp/$RUNID-provenancedb-import.log




More information about the Swift-commit mailing list