[Swift-commit] r7021 - provenancedb

lgadelha at ci.uchicago.edu lgadelha at ci.uchicago.edu
Wed Aug 28 09:08:55 CDT 2013


Author: lgadelha
Date: 2013-08-28 09:08:54 -0500 (Wed, 28 Aug 2013)
New Revision: 7021

Modified:
   provenancedb/README.asciidoc
   provenancedb/SPQL.g
   provenancedb/prov-init.sql
Log:
Adjustment of SPQL to latest db schema, minor updates.


Modified: provenancedb/README.asciidoc
===================================================================
--- provenancedb/README.asciidoc	2013-08-28 04:24:30 UTC (rev 7020)
+++ provenancedb/README.asciidoc	2013-08-28 14:08:54 UTC (rev 7021)
@@ -164,7 +164,7 @@
 
 === Swift Configuration
 
-To enable the generation of provenance information in Swift's log files and to trasfer wrapper logs back to the submitting machine for runtimei behavior information extraction the options +provenance.log+ and wrapperlog.always.transfer=true should be set to true in +etc/swift.properties+:
+To enable the generation of provenance information in Swift's log files and to trasfer wrapper logs back to the submitting machine for runtime behavior information extraction the options +provenance.log+ and wrapperlog.always.transfer=true should be set to true in +etc/swift.properties+:
 
 --------------------------------------
 provenance.log=true
@@ -219,7 +219,7 @@
  modis.swift     | 5483          | 3339        | SUCCESS     | 2012-10-26 11:44:59.909-02 |   85.050
 --------------------------------------
 
-List the 
+List the datasets and function calls from which the dataset dataset:20121026-1146-jng6bir4:720000001604 was derived:
 
 --------------------------------------
 select * from ancestors('dataset:20121026-1146-jng6bir4:720000001604');

Modified: provenancedb/SPQL.g
===================================================================
--- provenancedb/SPQL.g	2013-08-28 04:24:30 UTC (rev 7020)
+++ provenancedb/SPQL.g	2013-08-28 14:08:54 UTC (rev 7021)
@@ -28,26 +28,25 @@
 		schemaGraph.addVertex("annotation");
 		schemaGraph.addVertex("script_run");
 		schemaGraph.addVertex("function_call");
-		schemaGraph.addVertex("variable");
+		schemaGraph.addVertex("dataset");
 		schemaGraph.addVertex("application_execution");
 		schemaGraph.addVertex("runtime_info");
-		schemaGraph.addVertex("contains");
-		schemaGraph.addVertex("produces");
-		schemaGraph.addVertex("consumes");
+		schemaGraph.addVertex("dataset_containment");
+		schemaGraph.addVertex("dataset_out");
+		schemaGraph.addVertex("dataset_in");
 		schemaGraph.addVertex("compare_run");
-		schemaGraph.addVertex("variable_containment");
 		//schemaGraph.addEdge("annotation", "script_run");
 		//schemaGraph.addEdge("annotation", "function_call");
 		//schemaGraph.addEdge("annotation", "variable");
 		schemaGraph.addEdge("script_run", "function_call");
-		schemaGraph.addEdge("function_call", "consumes");
-		schemaGraph.addEdge("function_call", "produces");
+		schemaGraph.addEdge("function_call", "dataset_in");
+		schemaGraph.addEdge("function_call", "dataset_out");
 		schemaGraph.addEdge("function_call", "application_execution");
 		schemaGraph.addEdge("application_execution", "runtime_info");
-		schemaGraph.addEdge("variable", "variable_containment");
-		schemaGraph.addEdge("variable", "variable_containment");
-		schemaGraph.addEdge("variable", "consumes");
-		schemaGraph.addEdge("variable", "produces");
+		schemaGraph.addEdge("dataset", "dataset_containment");
+		schemaGraph.addEdge("dataset", "dataset_containment");
+		schemaGraph.addEdge("dataset", "dataset_in");
+		schemaGraph.addEdge("dataset", "dataset_out");
 
 		return schemaGraph;
 	}
@@ -83,17 +82,17 @@
 				// then one has to make sure that both consumed and produced datasets are considered in the join so there
 				// is no loss of information. One alternative, implemented here, is to replace these occurrences by the ds
 				// view, which is an union of ds_in and ds_out.
-				if(qrels.contains("produces") || qrels.contains("consumes")) {
+				if(qrels.contains("dataset_out") || qrels.contains("dataset_in")) {
 					fromRels.add(schemaGraph.getEdgeSource(aux));
 					fromRels.add(schemaGraph.getEdgeTarget(aux));				
 				}
 				else {
-					if(aux.equals(schemaGraph.getEdge("consumes","function_call")) || 
-							aux.equals(schemaGraph.getEdge("consumes","variable")) ||
-							aux.equals(schemaGraph.getEdge("produces","function_call")) ||
-							aux.equals(schemaGraph.getEdge("produces","variable"))) {
-						fromRels.add("variable");
-						fromRels.add("ds_use");
+					if(aux.equals(schemaGraph.getEdge("dataset_in","function_call")) || 
+							aux.equals(schemaGraph.getEdge("dataset_in","dataset")) ||
+							aux.equals(schemaGraph.getEdge("dataset_out","function_call")) ||
+							aux.equals(schemaGraph.getEdge("dataset_out","dataset"))) {
+						fromRels.add("dataset");
+						fromRels.add("dataset_io");
 						fromRels.add("function_call");
 					}
 					else {
@@ -124,27 +123,27 @@
 		//joinExpressions.put(schemaGraph.getEdge("annotation", "script_run"), "annotation.script_run_id=script_run.id");
 		joinExpressions.put(schemaGraph.getEdge("script_run", "function_call"), "script_run.id=function_call.script_run_id");
 		//joinExpressions.put(schemaGraph.getEdge("function_call", "annotation"), "function_call.id=annotation.function_call_id");
-		joinExpressions.put(schemaGraph.getEdge("function_call", "produces"), "function_call.id=produces.function_call_id");
-		joinExpressions.put(schemaGraph.getEdge("function_call", "consumes"), "function_call.id=consumes.function_call_id");
+		joinExpressions.put(schemaGraph.getEdge("function_call", "dataset_out"), "function_call.id=dataset_out.function_call_id");
+		joinExpressions.put(schemaGraph.getEdge("function_call", "dataset_in"), "function_call.id=dataset_in.function_call_id");
 		joinExpressions.put(schemaGraph.getEdge("function_call", "application_execution"), "function_call.id=application_execution.function_call_id");
 		joinExpressions.put(schemaGraph.getEdge("application_execution", "runtime_info"), "application_execution.id=runtime_info.application_execution_id");
-		joinExpressions.put(schemaGraph.getEdge("variable", "consumes"), "variable.id=consumes.variable_id");
-		joinExpressions.put(schemaGraph.getEdge("variable", "produces"), "variable.id=produces.variable_id");
-		//joinExpressions.put(schemaGraph.getEdge("variable", "annotation"), "variable.id=annotation.variable_id");
-		joinExpressions.put(schemaGraph.getEdge("variable", "containment"), "variable.id=containment.containee");
-		joinExpressions.put(schemaGraph.getEdge("variable", "containment"), "variable.id=containment.container");
+		joinExpressions.put(schemaGraph.getEdge("dataset", "dataset_in"), "dataset.id=dataset_in.dataset_id");
+		joinExpressions.put(schemaGraph.getEdge("dataset", "dataset_out"), "dataset.id=dataset_out.dataset_id");
+		//joinExpressions.put(schemaGraph.getEdge("dataset", "annotation"), "dataset.id=annotation.dataset_id");
+		joinExpressions.put(schemaGraph.getEdge("dataset", "containment"), "dataset.id=containment.containee");
+		joinExpressions.put(schemaGraph.getEdge("dataset", "containment"), "dataset.id=containment.container");
 
 		Iterator<DefaultEdge> i = jEdges.iterator();
 		if(i.hasNext()) {
 			DefaultEdge aux = i.next();
-			if(qrels.contains("consumes") || qrels.contains("produces")) {
+			if(qrels.contains("dataset_in") || qrels.contains("dataset_out")) {
 				joinExpressionsString = joinExpressions.get(aux);
 			}
 			else {
-				if(aux.equals(schemaGraph.getEdge("consumes","function_call")) || aux.equals(schemaGraph.getEdge("produces","function_call")))
-					joinExpressionsString = "ds_use.function_call_id=function_call.id";
-				else if(aux.equals(schemaGraph.getEdge("consumes","variable")) || aux.equals(schemaGraph.getEdge("produces","variable"))) 
-					joinExpressionsString = "ds_use.variable_id=variable.id";
+				if(aux.equals(schemaGraph.getEdge("dataset_in","function_call")) || aux.equals(schemaGraph.getEdge("dataset_out","function_call")))
+					joinExpressionsString = "dataset_io.function_call_id=function_call.id";
+				else if(aux.equals(schemaGraph.getEdge("dataset_in","dataset")) || aux.equals(schemaGraph.getEdge("dataset_out","dataset"))) 
+					joinExpressionsString = "dataset_io.dataset_id=dataset.id";
 				else {
 					joinExpressionsString = joinExpressions.get(aux);
 				}
@@ -155,14 +154,14 @@
 
 		while(i.hasNext()) {
 			DefaultEdge aux = i.next();
-			if(qrels.contains("consumes") || qrels.contains("produces")) {
+			if(qrels.contains("dataset_in") || qrels.contains("dataset_out")) {
 				joinExpressionsString += " AND " + joinExpressions.get(aux);
 			}
 			else {
-				if(aux.equals(schemaGraph.getEdge("consumes","function_call")) || aux.equals(schemaGraph.getEdge("produces","function_call")))
-					joinExpressionsString += " AND " + "ds_use.function_call_id=function_call.id";
-				else if(aux.equals(schemaGraph.getEdge("consumes","variable")) || aux.equals(schemaGraph.getEdge("produces","variable"))) 
-					joinExpressionsString += " AND " + "ds_use.variable_id=variable.id";
+				if(aux.equals(schemaGraph.getEdge("dataset_in","function_call")) || aux.equals(schemaGraph.getEdge("dataset_out","function_call")))
+					joinExpressionsString += " AND " + "dataset_io.function_call_id=function_call.id";
+				else if(aux.equals(schemaGraph.getEdge("dataset_in","dataset")) || aux.equals(schemaGraph.getEdge("dataset_out","dataset"))) 
+					joinExpressionsString += " AND " + "dataset_io.dataset_id=dataset.id";
 				else {
 					joinExpressionsString += " AND " + joinExpressions.get(aux);
 				}

Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql	2013-08-28 04:24:30 UTC (rev 7020)
+++ provenancedb/prov-init.sql	2013-08-28 14:08:54 UTC (rev 7021)
@@ -273,7 +273,7 @@
 
 create table annot_script_run_text ( script_run_id    varchar(256) references run (id) on delete cascade, 
      name      varchar(256),
-     value     varchar(2048),
+     value text,
      primary key (script_run_id, name)
 );
 
@@ -287,7 +287,7 @@
 create table annot_function_call_text ( 
        function_call_id	varchar(256) references fun_call (id) on delete cascade, 
        name       	varchar(256),
-       value      	varchar(2048),
+       value      	text,
        primary key (function_call_id, name)
 );
 
@@ -301,7 +301,7 @@
 create table annot_app_exec_text ( 
        app_exec_id            varchar(256) references app_exec (id) on delete cascade, 
        name      		    varchar(256),
-       value     		    varchar(2048),
+       value     		    text,
        primary key (app_exec_id, name)
 );
 
@@ -315,7 +315,7 @@
 create table annot_dataset_text( 
        dataset_id varchar(256) references ds (id) on delete cascade, 
        name  varchar(256),
-       value varchar(2048),
+       value text,
        primary key (dataset_id, name)
 );
 




More information about the Swift-commit mailing list