[Swift-commit] r2700 - provenancedb
noreply at svn.ci.uchicago.edu
noreply at svn.ci.uchicago.edu
Mon Mar 16 13:32:11 CDT 2009
Author: benc
Date: 2009-03-16 13:32:10 -0500 (Mon, 16 Mar 2009)
New Revision: 2700
Modified:
provenancedb/prov-init.sql
Log:
work on the documentation of the SQL schema
Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql 2009-03-16 18:31:11 UTC (rev 2699)
+++ provenancedb/prov-init.sql 2009-03-16 18:32:10 UTC (rev 2700)
@@ -1,5 +1,6 @@
--- sqlite3 monkey < this file
+-- this is the schema definition used for the main relational provenance
+-- implementation (in both sqlite3 and postgres)
DROP TABLE executes;
DROP TABLE dataset_usage;
@@ -10,11 +11,18 @@
DROP TABLE known_workflows;
DROP TABLE workflow_events;
+
+-- executes_in_workflow is unused at the moment, but is intended to associate
+-- each execute with its containing workflow
CREATE TABLE executes_in_workflows
(workflow_id char(128),
execute_id char(128)
);
+
+-- this gives information about each execute.
+-- each execute is identified by a unique URI. other information from
+-- swift logs is also stored here
CREATE TABLE executes
(id char(128) PRIMARY KEY,
starttime numeric,
@@ -24,7 +32,13 @@
scratch char(128)
);
--- no primary key here. should probably index both on execute_id and on
+
+-- dataset_usage records usage relationships between executes and datasets;
+-- in SwiftScript terms, the input and output parameters for each
+-- application procedure invocation; in OPM terms, the artificts which are
+-- input to and output from each process that is a Swift execution
+
+-- TODO: no primary key here. should probably index both on execute_id and on
-- dataset_id for common queries? maybe add arbitrary ID for sake of it?
CREATE TABLE dataset_usage
@@ -36,39 +50,66 @@
-- this dataset was bound to.
);
--- probably desirable that this is part of executes table
+
+-- invocation_procedure_name maps each execute ID to the name of its
+-- SwiftScript procedure
+
+-- TODO probably desirable that this is part of executes table
-- but for now this is the easiest to pull data from logs.
+
+-- TODO primary key should be execute_id
CREATE TABLE invocation_procedure_names
(execute_id char(128),
procedure_name char(128)
);
--- outer_dataset_id contains inner_dataset_id
+-- dataset_containment stores the containment hierarchy between
+-- container datasets (arrays and structs) and their contents.
+
+-- outer_dataset_id contains inner_dataset_id
+
+-- TODO this should perhaps be replaced with a more OPM-like model of
+-- constructors and accessors, rather than, or in addition to,
+-- a containment hierarchy. The relationship (such as array index or
+-- structure member name) should also be stored in this table.
CREATE TABLE dataset_containment
( outer_dataset_id char(128),
inner_dataset_id char(128)
);
--- dataset_filesnames (dataset_id, filename)
+
+-- dataset_filenames stores the filename mapped to each dataset. As some
+-- datasets do not have filenames, it should not be expected that
+-- every dataset will have a row in this table
+
+-- TODO dataset_id should be primary key
CREATE TABLE dataset_filenames
( dataset_id char(128),
filename char(128)
);
+
+-- known_workflows stores some information about each workflow log that has
+-- been seen by the importer: the log filename, swift version and import
+-- status.
CREATE TABLE known_workflows
( workflow_log_filename char(128),
version char(128),
importstatus char(128)
);
+
+-- workflow_events stores the start time and duration for each workflow
+-- that has been successfully imported.
CREATE TABLE workflow_events
( workflow_id char(128),
starttime numeric,
duration numeric
);
--- this does not work for sqlite; you'll get a syntax error but
--- ignore it
+
+-- this GRANT does not work for sqlite; you'll get a syntax error but
+-- ignore it, as it is not needed in sqlite
grant all on dataset_containment, dataset_filenames, dataset_usage, executes_in_workflows, invocation_procedure_names, known_workflows, workflow_events to public;
More information about the Swift-commit
mailing list