[Swift-commit] r2876 - provenancedb

noreply at svn.ci.uchicago.edu noreply at svn.ci.uchicago.edu
Thu Apr 23 10:46:50 CDT 2009


Author: benc
Date: 2009-04-23 10:46:50 -0500 (Thu, 23 Apr 2009)
New Revision: 2876

Modified:
   provenancedb/prov-init.sql
   provenancedb/prov-to-sql.sh
Log:
processes table to generalise executes, operators, @functions, compound
procedures, internal procedures

Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql	2009-04-23 15:46:02 UTC (rev 2875)
+++ provenancedb/prov-init.sql	2009-04-23 15:46:50 UTC (rev 2876)
@@ -2,6 +2,7 @@
 -- this is the schema definition used for the main relational provenance
 -- implementation (in both sqlite3 and postgres)
 
+DROP TABLE processes;
 DROP TABLE executes;
 DROP TABLE execute2s;
 DROP TABLE dataset_usage;
@@ -21,12 +22,22 @@
      execute_id char(128)
     );
 
+-- processes gives information about each process (in the OPM sense)
+-- it is augmented by information in other tables
+CREATE TABLE processes
+    (id char(128) PRIMARY KEY, -- a uri
+     type char(16) -- specifies the type of process. for any type, it
+                   -- must be the case that the specific type table
+                   -- has an entry for this process.
+                   -- Having this type here seems poor normalisation, though?
+    );
 
+
 -- this gives information about each execute.
 -- each execute is identified by a unique URI. other information from
--- swift logs is also stored here
+-- swift logs is also stored here. an execute is an OPM process.
 CREATE TABLE executes
-    (id char(128) PRIMARY KEY,
+    (id char(128) PRIMARY KEY, -- actually foreign key to processes
      starttime numeric,
      duration numeric,
      finalstate char(128),
@@ -40,14 +51,14 @@
 
 CREATE TABLE execute2s
     (id char(128) PRIMARY KEY,
-     execute_id, -- secondary key to executes table
+     execute_id, -- secondary key to executes and processes tables
      starttime numeric,
      duration numeric,
      finalstate char(128),
      site char(128)
     );
 
--- dataset_usage records usage relationships between executes and datasets;
+-- dataset_usage records usage relationships between processes and datasets;
 -- in SwiftScript terms, the input and output parameters for each
 -- application procedure invocation; in OPM terms, the artificts which are
 -- input to and output from each process that is a Swift execution
@@ -56,12 +67,13 @@
 -- dataset_id for common queries? maybe add arbitrary ID for sake of it?
 
 CREATE TABLE dataset_usage
-    (execute_id char(128), -- foreign key but not enforced because maybe execute
-                           -- doesn't exist at time. same type as executes.id
+    (process_id char(128), -- foreign key but not enforced because maybe process
+                           -- doesn't exist at time. same type as processes.id
      direction char(1), -- I or O for input or output
      dataset_id char(128), -- this will perhaps key against dataset table
      param_name char(128) -- the name of the parameter in this execute that
-                           -- this dataset was bound to.
+                          -- this dataset was bound to. sometimes this must
+                          -- be contrived (for example, in positional varargs)
     );
 
 

Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh	2009-04-23 15:46:02 UTC (rev 2875)
+++ provenancedb/prov-to-sql.sh	2009-04-23 15:46:50 UTC (rev 2876)
@@ -10,6 +10,7 @@
 rm -f tmp-import.sql
 
 while read time duration thread endstate app scratch; do
+  echo "INSERT INTO processes (id, type) VALUES ('$thread', 'execute');" >> tmp-import.sql
   echo "INSERT INTO executes (id, starttime, duration, finalstate, app, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$app', '$scratch');" >> tmp-import.sql
 done < execute.global.event
 
@@ -22,14 +23,13 @@
   echo "INSERT INTO execute2s (id, execute_id, starttime, duration, finalstate, site) VALUES ('$globalid', '$execute_id', '$starttime', '$duration', '$endstate', '$site');" >> tmp-import.sql
 done < execute2.global.event
 
-
 while read thread direction dataset variable rest; do 
   if [ "$direction" == "input" ] ; then
     dir=I
   else
     dir=O
   fi
-  echo "INSERT INTO dataset_usage (execute_id, direction, dataset_id, param_name) VALUES ('$thread', '$dir', '$dataset', '$variable');" >> tmp-import.sql
+  echo "INSERT INTO dataset_usage (process_id, direction, dataset_id, param_name) VALUES ('$thread', '$dir', '$dataset', '$variable');" >> tmp-import.sql
 done < tie-data-invocs.txt
 
 while read thread appname; do




More information about the Swift-commit mailing list