[Swift-commit] r2700 - provenancedb

noreply at svn.ci.uchicago.edu noreply at svn.ci.uchicago.edu
Mon Mar 16 13:32:11 CDT 2009


Author: benc
Date: 2009-03-16 13:32:10 -0500 (Mon, 16 Mar 2009)
New Revision: 2700

Modified:
   provenancedb/prov-init.sql
Log:
work on the documentation of the SQL schema

Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql	2009-03-16 18:31:11 UTC (rev 2699)
+++ provenancedb/prov-init.sql	2009-03-16 18:32:10 UTC (rev 2700)
@@ -1,5 +1,6 @@
 
--- sqlite3 monkey < this file
+-- this is the schema definition used for the main relational provenance
+-- implementation (in both sqlite3 and postgres)
 
 DROP TABLE executes;
 DROP TABLE dataset_usage;
@@ -10,11 +11,18 @@
 DROP TABLE known_workflows;
 DROP TABLE workflow_events;
 
+
+-- executes_in_workflow is unused at the moment, but is intended to associate
+-- each execute with its containing workflow
 CREATE TABLE executes_in_workflows
     (workflow_id char(128),
      execute_id char(128)
     );
 
+
+-- this gives information about each execute.
+-- each execute is identified by a unique URI. other information from
+-- swift logs is also stored here
 CREATE TABLE executes
     (id char(128) PRIMARY KEY,
      starttime numeric,
@@ -24,7 +32,13 @@
      scratch char(128)
     );
 
--- no primary key here. should probably index both on execute_id and on
+
+-- dataset_usage records usage relationships between executes and datasets;
+-- in SwiftScript terms, the input and output parameters for each
+-- application procedure invocation; in OPM terms, the artificts which are
+-- input to and output from each process that is a Swift execution
+
+-- TODO: no primary key here. should probably index both on execute_id and on
 -- dataset_id for common queries? maybe add arbitrary ID for sake of it?
 
 CREATE TABLE dataset_usage
@@ -36,39 +50,66 @@
                            -- this dataset was bound to.
     );
 
--- probably desirable that this is part of executes table
+
+-- invocation_procedure_name maps each execute ID to the name of its
+-- SwiftScript procedure
+
+-- TODO probably desirable that this is part of executes table
 -- but for now this is the easiest to pull data from logs.
+
+-- TODO primary key should be execute_id
 CREATE TABLE invocation_procedure_names
     (execute_id char(128),
      procedure_name char(128)
     );
 
 
---  outer_dataset_id contains inner_dataset_id
+-- dataset_containment stores the containment hierarchy between
+-- container datasets (arrays and structs) and their contents.
+
+-- outer_dataset_id contains inner_dataset_id
+
+-- TODO this should perhaps be replaced with a more OPM-like model of
+-- constructors and accessors, rather than, or in addition to,
+-- a containment hierarchy. The relationship (such as array index or
+-- structure member name) should also be stored in this table.
 CREATE TABLE dataset_containment
     ( outer_dataset_id char(128),
       inner_dataset_id char(128)
     );
 
--- dataset_filesnames (dataset_id, filename) 
+
+-- dataset_filenames stores the filename mapped to each dataset. As some
+-- datasets do not have filenames, it should not be expected that 
+-- every dataset will have a row in this table
+
+-- TODO dataset_id should be primary key
 CREATE TABLE dataset_filenames
     ( dataset_id char(128),
       filename char(128)
     );
 
+
+-- known_workflows stores some information about each workflow log that has
+-- been seen by the importer: the log filename, swift version and import
+-- status.
 CREATE TABLE known_workflows
     ( workflow_log_filename char(128),
       version char(128),
       importstatus char(128)
     );
 
+
+-- workflow_events stores the start time and duration for each workflow
+-- that has been successfully imported.
 CREATE TABLE workflow_events
     ( workflow_id char(128),
       starttime numeric,
       duration numeric
     );
 
--- this does not work for sqlite; you'll get a syntax error but
--- ignore it
+
+-- this GRANT does not work for sqlite; you'll get a syntax error but
+-- ignore it, as it is not needed in sqlite
 grant all on dataset_containment, dataset_filenames, dataset_usage, executes_in_workflows, invocation_procedure_names, known_workflows, workflow_events to public;
 




More information about the Swift-commit mailing list