[Swift-commit] r2877 - provenancedb

noreply at svn.ci.uchicago.edu noreply at svn.ci.uchicago.edu
Thu Apr 23 10:47:39 CDT 2009


Author: benc
Date: 2009-04-23 10:47:39 -0500 (Thu, 23 Apr 2009)
New Revision: 2877

Modified:
   provenancedb/prepare-provenance-chart
   provenancedb/prov-init.sql
   provenancedb/prov-to-sql.sh
Log:
Imports the values of primitive-typed datasets.

Modified: provenancedb/prepare-provenance-chart
===================================================================
--- provenancedb/prepare-provenance-chart	2009-04-23 15:46:50 UTC (rev 2876)
+++ provenancedb/prepare-provenance-chart	2009-04-23 15:47:39 UTC (rev 2877)
@@ -21,6 +21,8 @@
 #AbstractDataNode dataset 3814442 has filename file://localhost/q.out
 cat $1 | grep ' FILENAME ' | sed 's/^.*dataset=\([^ ]*\) filename=\([^ ]*\).*$/\1 \2/' | sort | uniq > dataset-filenames.txt
 
+cat $1 | grep ' VALUE ' | sed 's/^.*dataset=\([^ ]*\) VALUE=\(.*\)$/\1 \2/' | sort | uniq > dataset-values.txt
+
 cat $1 | grep ' PROCEDURE ' | sed "s/^.*thread=\([^ ]*\) name=\([^ ]*\)\$/${WFID}\1 \2/" > invocation-procedure-names.txt
 
 info-to-extrainfo > extrainfo.txt

Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql	2009-04-23 15:46:50 UTC (rev 2876)
+++ provenancedb/prov-init.sql	2009-04-23 15:47:39 UTC (rev 2877)
@@ -10,6 +10,7 @@
 DROP TABLE dataset_containment;
 DROP TABLE dataset_filenames;
 DROP TABLE executes_in_workflows;
+DROP TABLE dataset_values;
 DROP TABLE known_workflows;
 DROP TABLE workflow_events;
 DROP TABLE extrainfo;
@@ -115,6 +116,15 @@
       filename char(128)
     );
 
+-- dataset_values stores the value for each dataset which is known to have
+-- a value (which is all assigned primitive types). No attempt is made here
+-- to expose that value as an SQL type other than a string, and so (for
+-- example) SQL numerical operations should not be expected to work, even
+-- though the user knows that a particular dataset stores a numeric value.
+CREATE TABLE dataset_values
+    ( dataset_id char(128), -- should be primary key
+      value char(128)
+    );
 
 -- known_workflows stores some information about each workflow log that has
 -- been seen by the importer: the log filename, swift version and import

Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh	2009-04-23 15:46:50 UTC (rev 2876)
+++ provenancedb/prov-to-sql.sh	2009-04-23 15:47:39 UTC (rev 2877)
@@ -45,6 +45,10 @@
   echo "INSERT INTO dataset_filenames (dataset_id, filename) VALUES ('$dataset', '$filename');" >> tmp-import.sql
 done < dataset-filenames.txt
 
+while read dataset value; do
+  echo "INSERT INTO dataset_values (dataset_id, value) VALUES ('$dataset', '$value');" >> tmp-import.sql
+done < dataset-values.txt
+
 while read start duration wfid; do
   echo "INSERT INTO workflow_events (workflow_id,starttime, duration) VALUES ('$wfid', '$start', '$duration');" >> tmp-import.sql
 done < workflow.event




More information about the Swift-commit mailing list