[Swift-commit] r2876 - provenancedb
noreply at svn.ci.uchicago.edu
noreply at svn.ci.uchicago.edu
Thu Apr 23 10:46:50 CDT 2009
Author: benc
Date: 2009-04-23 10:46:50 -0500 (Thu, 23 Apr 2009)
New Revision: 2876
Modified:
provenancedb/prov-init.sql
provenancedb/prov-to-sql.sh
Log:
processes table to generalise executes, operators, @functions, compound
procedures, internal procedures
Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql 2009-04-23 15:46:02 UTC (rev 2875)
+++ provenancedb/prov-init.sql 2009-04-23 15:46:50 UTC (rev 2876)
@@ -2,6 +2,7 @@
-- this is the schema definition used for the main relational provenance
-- implementation (in both sqlite3 and postgres)
+DROP TABLE processes;
DROP TABLE executes;
DROP TABLE execute2s;
DROP TABLE dataset_usage;
@@ -21,12 +22,22 @@
execute_id char(128)
);
+-- processes gives information about each process (in the OPM sense)
+-- it is augmented by information in other tables
+CREATE TABLE processes
+ (id char(128) PRIMARY KEY, -- a uri
+ type char(16) -- specifies the type of process. for any type, it
+ -- must be the case that the specific type table
+ -- has an entry for this process.
+ -- Having this type here seems poor normalisation, though?
+ );
+
-- this gives information about each execute.
-- each execute is identified by a unique URI. other information from
--- swift logs is also stored here
+-- swift logs is also stored here. an execute is an OPM process.
CREATE TABLE executes
- (id char(128) PRIMARY KEY,
+ (id char(128) PRIMARY KEY, -- actually foreign key to processes
starttime numeric,
duration numeric,
finalstate char(128),
@@ -40,14 +51,14 @@
CREATE TABLE execute2s
(id char(128) PRIMARY KEY,
- execute_id, -- secondary key to executes table
+ execute_id, -- secondary key to executes and processes tables
starttime numeric,
duration numeric,
finalstate char(128),
site char(128)
);
--- dataset_usage records usage relationships between executes and datasets;
+-- dataset_usage records usage relationships between processes and datasets;
-- in SwiftScript terms, the input and output parameters for each
-- application procedure invocation; in OPM terms, the artificts which are
-- input to and output from each process that is a Swift execution
@@ -56,12 +67,13 @@
-- dataset_id for common queries? maybe add arbitrary ID for sake of it?
CREATE TABLE dataset_usage
- (execute_id char(128), -- foreign key but not enforced because maybe execute
- -- doesn't exist at time. same type as executes.id
+ (process_id char(128), -- foreign key but not enforced because maybe process
+ -- doesn't exist at time. same type as processes.id
direction char(1), -- I or O for input or output
dataset_id char(128), -- this will perhaps key against dataset table
param_name char(128) -- the name of the parameter in this execute that
- -- this dataset was bound to.
+ -- this dataset was bound to. sometimes this must
+ -- be contrived (for example, in positional varargs)
);
Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh 2009-04-23 15:46:02 UTC (rev 2875)
+++ provenancedb/prov-to-sql.sh 2009-04-23 15:46:50 UTC (rev 2876)
@@ -10,6 +10,7 @@
rm -f tmp-import.sql
while read time duration thread endstate app scratch; do
+ echo "INSERT INTO processes (id, type) VALUES ('$thread', 'execute');" >> tmp-import.sql
echo "INSERT INTO executes (id, starttime, duration, finalstate, app, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$app', '$scratch');" >> tmp-import.sql
done < execute.global.event
@@ -22,14 +23,13 @@
echo "INSERT INTO execute2s (id, execute_id, starttime, duration, finalstate, site) VALUES ('$globalid', '$execute_id', '$starttime', '$duration', '$endstate', '$site');" >> tmp-import.sql
done < execute2.global.event
-
while read thread direction dataset variable rest; do
if [ "$direction" == "input" ] ; then
dir=I
else
dir=O
fi
- echo "INSERT INTO dataset_usage (execute_id, direction, dataset_id, param_name) VALUES ('$thread', '$dir', '$dataset', '$variable');" >> tmp-import.sql
+ echo "INSERT INTO dataset_usage (process_id, direction, dataset_id, param_name) VALUES ('$thread', '$dir', '$dataset', '$variable');" >> tmp-import.sql
done < tie-data-invocs.txt
while read thread appname; do
More information about the Swift-commit
mailing list