[Swift-commit] r7021 - provenancedb
lgadelha at ci.uchicago.edu
lgadelha at ci.uchicago.edu
Wed Aug 28 09:08:55 CDT 2013
Author: lgadelha
Date: 2013-08-28 09:08:54 -0500 (Wed, 28 Aug 2013)
New Revision: 7021
Modified:
provenancedb/README.asciidoc
provenancedb/SPQL.g
provenancedb/prov-init.sql
Log:
Adjustment of SPQL to latest db schema, minor updates.
Modified: provenancedb/README.asciidoc
===================================================================
--- provenancedb/README.asciidoc 2013-08-28 04:24:30 UTC (rev 7020)
+++ provenancedb/README.asciidoc 2013-08-28 14:08:54 UTC (rev 7021)
@@ -164,7 +164,7 @@
=== Swift Configuration
-To enable the generation of provenance information in Swift's log files and to trasfer wrapper logs back to the submitting machine for runtimei behavior information extraction the options +provenance.log+ and wrapperlog.always.transfer=true should be set to true in +etc/swift.properties+:
+To enable the generation of provenance information in Swift's log files and to trasfer wrapper logs back to the submitting machine for runtime behavior information extraction the options +provenance.log+ and wrapperlog.always.transfer=true should be set to true in +etc/swift.properties+:
--------------------------------------
provenance.log=true
@@ -219,7 +219,7 @@
modis.swift | 5483 | 3339 | SUCCESS | 2012-10-26 11:44:59.909-02 | 85.050
--------------------------------------
-List the
+List the datasets and function calls from which the dataset dataset:20121026-1146-jng6bir4:720000001604 was derived:
--------------------------------------
select * from ancestors('dataset:20121026-1146-jng6bir4:720000001604');
Modified: provenancedb/SPQL.g
===================================================================
--- provenancedb/SPQL.g 2013-08-28 04:24:30 UTC (rev 7020)
+++ provenancedb/SPQL.g 2013-08-28 14:08:54 UTC (rev 7021)
@@ -28,26 +28,25 @@
schemaGraph.addVertex("annotation");
schemaGraph.addVertex("script_run");
schemaGraph.addVertex("function_call");
- schemaGraph.addVertex("variable");
+ schemaGraph.addVertex("dataset");
schemaGraph.addVertex("application_execution");
schemaGraph.addVertex("runtime_info");
- schemaGraph.addVertex("contains");
- schemaGraph.addVertex("produces");
- schemaGraph.addVertex("consumes");
+ schemaGraph.addVertex("dataset_containment");
+ schemaGraph.addVertex("dataset_out");
+ schemaGraph.addVertex("dataset_in");
schemaGraph.addVertex("compare_run");
- schemaGraph.addVertex("variable_containment");
//schemaGraph.addEdge("annotation", "script_run");
//schemaGraph.addEdge("annotation", "function_call");
//schemaGraph.addEdge("annotation", "variable");
schemaGraph.addEdge("script_run", "function_call");
- schemaGraph.addEdge("function_call", "consumes");
- schemaGraph.addEdge("function_call", "produces");
+ schemaGraph.addEdge("function_call", "dataset_in");
+ schemaGraph.addEdge("function_call", "dataset_out");
schemaGraph.addEdge("function_call", "application_execution");
schemaGraph.addEdge("application_execution", "runtime_info");
- schemaGraph.addEdge("variable", "variable_containment");
- schemaGraph.addEdge("variable", "variable_containment");
- schemaGraph.addEdge("variable", "consumes");
- schemaGraph.addEdge("variable", "produces");
+ schemaGraph.addEdge("dataset", "dataset_containment");
+ schemaGraph.addEdge("dataset", "dataset_containment");
+ schemaGraph.addEdge("dataset", "dataset_in");
+ schemaGraph.addEdge("dataset", "dataset_out");
return schemaGraph;
}
@@ -83,17 +82,17 @@
// then one has to make sure that both consumed and produced datasets are considered in the join so there
// is no loss of information. One alternative, implemented here, is to replace these occurrences by the ds
// view, which is an union of ds_in and ds_out.
- if(qrels.contains("produces") || qrels.contains("consumes")) {
+ if(qrels.contains("dataset_out") || qrels.contains("dataset_in")) {
fromRels.add(schemaGraph.getEdgeSource(aux));
fromRels.add(schemaGraph.getEdgeTarget(aux));
}
else {
- if(aux.equals(schemaGraph.getEdge("consumes","function_call")) ||
- aux.equals(schemaGraph.getEdge("consumes","variable")) ||
- aux.equals(schemaGraph.getEdge("produces","function_call")) ||
- aux.equals(schemaGraph.getEdge("produces","variable"))) {
- fromRels.add("variable");
- fromRels.add("ds_use");
+ if(aux.equals(schemaGraph.getEdge("dataset_in","function_call")) ||
+ aux.equals(schemaGraph.getEdge("dataset_in","dataset")) ||
+ aux.equals(schemaGraph.getEdge("dataset_out","function_call")) ||
+ aux.equals(schemaGraph.getEdge("dataset_out","dataset"))) {
+ fromRels.add("dataset");
+ fromRels.add("dataset_io");
fromRels.add("function_call");
}
else {
@@ -124,27 +123,27 @@
//joinExpressions.put(schemaGraph.getEdge("annotation", "script_run"), "annotation.script_run_id=script_run.id");
joinExpressions.put(schemaGraph.getEdge("script_run", "function_call"), "script_run.id=function_call.script_run_id");
//joinExpressions.put(schemaGraph.getEdge("function_call", "annotation"), "function_call.id=annotation.function_call_id");
- joinExpressions.put(schemaGraph.getEdge("function_call", "produces"), "function_call.id=produces.function_call_id");
- joinExpressions.put(schemaGraph.getEdge("function_call", "consumes"), "function_call.id=consumes.function_call_id");
+ joinExpressions.put(schemaGraph.getEdge("function_call", "dataset_out"), "function_call.id=dataset_out.function_call_id");
+ joinExpressions.put(schemaGraph.getEdge("function_call", "dataset_in"), "function_call.id=dataset_in.function_call_id");
joinExpressions.put(schemaGraph.getEdge("function_call", "application_execution"), "function_call.id=application_execution.function_call_id");
joinExpressions.put(schemaGraph.getEdge("application_execution", "runtime_info"), "application_execution.id=runtime_info.application_execution_id");
- joinExpressions.put(schemaGraph.getEdge("variable", "consumes"), "variable.id=consumes.variable_id");
- joinExpressions.put(schemaGraph.getEdge("variable", "produces"), "variable.id=produces.variable_id");
- //joinExpressions.put(schemaGraph.getEdge("variable", "annotation"), "variable.id=annotation.variable_id");
- joinExpressions.put(schemaGraph.getEdge("variable", "containment"), "variable.id=containment.containee");
- joinExpressions.put(schemaGraph.getEdge("variable", "containment"), "variable.id=containment.container");
+ joinExpressions.put(schemaGraph.getEdge("dataset", "dataset_in"), "dataset.id=dataset_in.dataset_id");
+ joinExpressions.put(schemaGraph.getEdge("dataset", "dataset_out"), "dataset.id=dataset_out.dataset_id");
+ //joinExpressions.put(schemaGraph.getEdge("dataset", "annotation"), "dataset.id=annotation.dataset_id");
+ joinExpressions.put(schemaGraph.getEdge("dataset", "containment"), "dataset.id=containment.containee");
+ joinExpressions.put(schemaGraph.getEdge("dataset", "containment"), "dataset.id=containment.container");
Iterator<DefaultEdge> i = jEdges.iterator();
if(i.hasNext()) {
DefaultEdge aux = i.next();
- if(qrels.contains("consumes") || qrels.contains("produces")) {
+ if(qrels.contains("dataset_in") || qrels.contains("dataset_out")) {
joinExpressionsString = joinExpressions.get(aux);
}
else {
- if(aux.equals(schemaGraph.getEdge("consumes","function_call")) || aux.equals(schemaGraph.getEdge("produces","function_call")))
- joinExpressionsString = "ds_use.function_call_id=function_call.id";
- else if(aux.equals(schemaGraph.getEdge("consumes","variable")) || aux.equals(schemaGraph.getEdge("produces","variable")))
- joinExpressionsString = "ds_use.variable_id=variable.id";
+ if(aux.equals(schemaGraph.getEdge("dataset_in","function_call")) || aux.equals(schemaGraph.getEdge("dataset_out","function_call")))
+ joinExpressionsString = "dataset_io.function_call_id=function_call.id";
+ else if(aux.equals(schemaGraph.getEdge("dataset_in","dataset")) || aux.equals(schemaGraph.getEdge("dataset_out","dataset")))
+ joinExpressionsString = "dataset_io.dataset_id=dataset.id";
else {
joinExpressionsString = joinExpressions.get(aux);
}
@@ -155,14 +154,14 @@
while(i.hasNext()) {
DefaultEdge aux = i.next();
- if(qrels.contains("consumes") || qrels.contains("produces")) {
+ if(qrels.contains("dataset_in") || qrels.contains("dataset_out")) {
joinExpressionsString += " AND " + joinExpressions.get(aux);
}
else {
- if(aux.equals(schemaGraph.getEdge("consumes","function_call")) || aux.equals(schemaGraph.getEdge("produces","function_call")))
- joinExpressionsString += " AND " + "ds_use.function_call_id=function_call.id";
- else if(aux.equals(schemaGraph.getEdge("consumes","variable")) || aux.equals(schemaGraph.getEdge("produces","variable")))
- joinExpressionsString += " AND " + "ds_use.variable_id=variable.id";
+ if(aux.equals(schemaGraph.getEdge("dataset_in","function_call")) || aux.equals(schemaGraph.getEdge("dataset_out","function_call")))
+ joinExpressionsString += " AND " + "dataset_io.function_call_id=function_call.id";
+ else if(aux.equals(schemaGraph.getEdge("dataset_in","dataset")) || aux.equals(schemaGraph.getEdge("dataset_out","dataset")))
+ joinExpressionsString += " AND " + "dataset_io.dataset_id=dataset.id";
else {
joinExpressionsString += " AND " + joinExpressions.get(aux);
}
Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql 2013-08-28 04:24:30 UTC (rev 7020)
+++ provenancedb/prov-init.sql 2013-08-28 14:08:54 UTC (rev 7021)
@@ -273,7 +273,7 @@
create table annot_script_run_text ( script_run_id varchar(256) references run (id) on delete cascade,
name varchar(256),
- value varchar(2048),
+ value text,
primary key (script_run_id, name)
);
@@ -287,7 +287,7 @@
create table annot_function_call_text (
function_call_id varchar(256) references fun_call (id) on delete cascade,
name varchar(256),
- value varchar(2048),
+ value text,
primary key (function_call_id, name)
);
@@ -301,7 +301,7 @@
create table annot_app_exec_text (
app_exec_id varchar(256) references app_exec (id) on delete cascade,
name varchar(256),
- value varchar(2048),
+ value text,
primary key (app_exec_id, name)
);
@@ -315,7 +315,7 @@
create table annot_dataset_text(
dataset_id varchar(256) references ds (id) on delete cascade,
name varchar(256),
- value varchar(2048),
+ value text,
primary key (dataset_id, name)
);
More information about the Swift-commit
mailing list