[Swift-commit] r4968 - provenancedb
lgadelha at ci.uchicago.edu
lgadelha at ci.uchicago.edu
Tue Aug 9 08:16:30 CDT 2011
Author: lgadelha
Date: 2011-08-09 08:16:29 -0500 (Tue, 09 Aug 2011)
New Revision: 4968
Added:
provenancedb/ProvSQL.g
provenancedb/compare_run.py
provenancedb/compare_run.sh
provenancedb/import-run-to-datalog
provenancedb/prov-to-datalog.sh
provenancedb/provenancedb-rules.datalog
provenancedb/swift-prov-import-all-logs-datalog
Removed:
provenancedb/prov-to-swipl.sh
provenancedb/xq.xq
Modified:
provenancedb/pql_functions.sql
provenancedb/prov-init.sql
provenancedb/prov-to-sql.sh
provenancedb/swift-prov-import-all-logs
Log:
Added ProvSQL grammar. Added query templates for comparison/correlation query patterns. Added Datalog import for experimentation.
Added: provenancedb/ProvSQL.g
===================================================================
--- provenancedb/ProvSQL.g (rev 0)
+++ provenancedb/ProvSQL.g 2011-08-09 13:16:29 UTC (rev 4968)
@@ -0,0 +1,309 @@
+grammar ProvSQL;
+
+ at header {
+ import java.util.HashSet;
+ import java.util.HashMap;
+ import java.util.Iterator;
+ import org.jgrapht.*;
+ import org.jgrapht.alg.DijkstraShortestPath;
+ import org.jgrapht.graph.*;
+}
+
+ at members {
+ String selectClause = new String();
+ String fromClause = new String();
+ String whereClauseJoinExpressions = new String();
+ String whereClause = new String();
+ boolean hasWhereJoinExpression;
+ HashSet<String> relations = new HashSet<String>();
+ UndirectedGraph<String,DefaultEdge> schemaGraph;
+ HashSet<DefaultEdge> joinEdges;
+
+ // Ideally it could receive a DB schema in SQL and build the graph automatically
+ public static UndirectedGraph<String,DefaultEdge> buildGraph() {
+ UndirectedGraph<String,DefaultEdge> schemaGraph = new Multigraph<String,DefaultEdge>(DefaultEdge.class);
+
+ schemaGraph.addVertex("a_run_t");
+ schemaGraph.addVertex("a_run_n");
+ schemaGraph.addVertex("run");
+ schemaGraph.addVertex("proc");
+ schemaGraph.addVertex("a_proc_n");
+ schemaGraph.addVertex("a_proc_t");
+ schemaGraph.addVertex("app_inv");
+ schemaGraph.addVertex("app_exec");
+ schemaGraph.addVertex("rt_info");
+ schemaGraph.addVertex("ds_in");
+ schemaGraph.addVertex("ds_out");
+ schemaGraph.addVertex("ds");
+ schemaGraph.addVertex("file");
+ schemaGraph.addVertex("in_mem");
+ schemaGraph.addVertex("a_ds_t");
+ schemaGraph.addVertex("a_ds_n");
+ schemaGraph.addVertex("ds_cont");
+ schemaGraph.addEdge("a_run_t", "run");
+ schemaGraph.addEdge("a_run_n", "run");
+ schemaGraph.addEdge("run","proc");
+ schemaGraph.addEdge("proc", "a_proc_t");
+ schemaGraph.addEdge("proc", "a_proc_n");
+ schemaGraph.addEdge("proc", "ds_out");
+ schemaGraph.addEdge("proc", "ds_in");
+ schemaGraph.addEdge("proc", "app_inv");
+ schemaGraph.addEdge("app_inv", "app_exec");
+ schemaGraph.addEdge("app_exec", "rt_info");
+ schemaGraph.addEdge("ds", "ds_in");
+ schemaGraph.addEdge("ds", "ds_out");
+ schemaGraph.addEdge("ds", "a_ds_t");
+ schemaGraph.addEdge("ds", "a_ds_n");
+ schemaGraph.addEdge("ds", "file");
+ schemaGraph.addEdge("ds", "in_mem");
+ schemaGraph.addEdge("ds", "ds_cont");
+ schemaGraph.addEdge("ds", "ds_cont");
+
+ return schemaGraph;
+ }
+
+ private static HashSet<DefaultEdge> computeJoinEdges(
+ UndirectedGraph<String, DefaultEdge> schemaGraph,
+ HashSet<String> relations) {
+ HashSet<DefaultEdge> jEdges = new HashSet<DefaultEdge>();
+ Iterator<String> i = relations.iterator();
+ String first = new String();
+ if(i.hasNext())
+ first += i.next();
+ while(i.hasNext()) {
+ DijkstraShortestPath<String, DefaultEdge> sP = new DijkstraShortestPath<String, DefaultEdge>(schemaGraph, first, i.next());
+ Iterator<DefaultEdge> j = (sP.getPathEdgeList()).iterator();
+ while(j.hasNext())
+ jEdges.add(j.next());
+ }
+
+
+ return jEdges;
+ }
+
+
+ public static String computeFrom(UndirectedGraph<String,DefaultEdge> schemaGraph, HashSet<DefaultEdge> joinEdges, HashSet<String> qrels) {
+ HashSet<String> fromRels = new HashSet<String>();
+ String fromq = " FROM ";
+ Iterator<DefaultEdge> i = joinEdges.iterator();
+ while(i.hasNext()) {
+ DefaultEdge aux = i.next();
+ // If ds_in or ds_out were not in the original select clause's relations and they are on the the joinEdges
+ // then one has to make sure that both consumed and produced datasets are considered in the join so there
+ // is no loss of information. One alternative, implemented here, is to replace these occurrences by the ds
+ // view, which is an union of ds_in and ds_out.
+ if(qrels.contains("ds_in") || qrels.contains("ds_out")) {
+ fromRels.add(schemaGraph.getEdgeSource(aux));
+ fromRels.add(schemaGraph.getEdgeTarget(aux));
+ }
+ else {
+ if(aux.equals(schemaGraph.getEdge("ds_in","proc")) ||
+ aux.equals(schemaGraph.getEdge("ds_in","ds")) ||
+ aux.equals(schemaGraph.getEdge("ds_out","proc")) ||
+ aux.equals(schemaGraph.getEdge("ds_out","ds"))) {
+ fromRels.add("ds");
+ fromRels.add("ds_use");
+ fromRels.add("proc");
+ }
+ else {
+ fromRels.add(schemaGraph.getEdgeSource(aux));
+ fromRels.add(schemaGraph.getEdgeTarget(aux));
+ }
+
+ }
+ }
+ Iterator<String> j = fromRels.iterator();
+ if(j.hasNext())
+ fromq += j.next();
+ while(j.hasNext())
+ fromq+=","+j.next();
+
+ return fromq;
+ }
+
+
+ public static String computeJoinExpressions(UndirectedGraph<String,DefaultEdge> schemaGraph, HashSet<DefaultEdge> jEdges, HashSet<String> qrels) {
+
+ HashMap<DefaultEdge,String> joinExpressions = new HashMap<DefaultEdge, String>();
+ String joinExpressionsString = new String();
+
+ joinExpressions.put(schemaGraph.getEdge("a_run_t", "run"), "a_run_t.run_id=run.id");
+ joinExpressions.put(schemaGraph.getEdge("a_run_n", "run"), "a_run_n.run_id=run.id");
+ joinExpressions.put(schemaGraph.getEdge("run", "proc"), "run.id=proc.run_id");
+ joinExpressions.put(schemaGraph.getEdge("proc", "a_proc_t"), "proc.id=a_proc_t.proc_id");
+ joinExpressions.put(schemaGraph.getEdge("proc", "a_proc_n"), "proc.id=a_proc_n.proc_id");
+ joinExpressions.put(schemaGraph.getEdge("proc", "ds_out"), "proc.id=ds_out.proc_id");
+ joinExpressions.put(schemaGraph.getEdge("proc", "ds_in"), "proc.id=ds_in.proc_id");
+ joinExpressions.put(schemaGraph.getEdge("proc", "app_inv"), "proc.id=app_inv.id");
+ joinExpressions.put(schemaGraph.getEdge("app_inv", "app_exec"), "app_inv.id=app_exec.app_inv_id");
+ joinExpressions.put(schemaGraph.getEdge("app_exec", "rt_info"), "app_exec.id=rt_info.app_exec_id");
+ joinExpressions.put(schemaGraph.getEdge("ds", "ds_in"), "ds.id=ds_in.id");
+ joinExpressions.put(schemaGraph.getEdge("ds", "ds_out"), "ds.id=ds_out.id");
+ joinExpressions.put(schemaGraph.getEdge("ds", "a_ds_t"), "ds.id=a_ds_t.ds_id");
+ joinExpressions.put(schemaGraph.getEdge("ds", "a_ds_n"), "ds.id=a_ds_n.ds_id");
+ joinExpressions.put(schemaGraph.getEdge("ds", "file"), "ds.id=file.id");
+ joinExpressions.put(schemaGraph.getEdge("ds", "in_mem"), "ds.id=in_mem.id");
+ joinExpressions.put(schemaGraph.getEdge("ds", "ds_cont"), "ds.id=ds_cont.in_id");
+ joinExpressions.put(schemaGraph.getEdge("ds", "ds_cont"), "ds.id=ds_cont.out_id");
+
+ Iterator<DefaultEdge> i = jEdges.iterator();
+ if(i.hasNext()) {
+ DefaultEdge aux = i.next();
+ if(qrels.contains("ds_in") || qrels.contains("ds_out")) {
+ joinExpressionsString = joinExpressions.get(aux);
+ }
+ else {
+ if(aux.equals(schemaGraph.getEdge("ds_in","proc")) || aux.equals(schemaGraph.getEdge("ds_out","proc")))
+ joinExpressionsString = "ds_use.proc_id=proc.id";
+ else if(aux.equals(schemaGraph.getEdge("ds_in","ds")) || aux.equals(schemaGraph.getEdge("ds_out","ds")))
+ joinExpressionsString = "ds_use.ds_id=ds.id";
+ else {
+ joinExpressionsString = joinExpressions.get(aux);
+ }
+
+ }
+ }
+
+
+ while(i.hasNext()) {
+ DefaultEdge aux = i.next();
+ if(qrels.contains("ds_in") || qrels.contains("ds_out")) {
+ joinExpressionsString += " AND " + joinExpressions.get(aux);
+ }
+ else {
+ if(aux.equals(schemaGraph.getEdge("ds_in","proc")) || aux.equals(schemaGraph.getEdge("ds_out","proc")))
+ joinExpressionsString += " AND " + "ds_use.proc_id=proc.id";
+ else if(aux.equals(schemaGraph.getEdge("ds_in","ds")) || aux.equals(schemaGraph.getEdge("ds_out","ds")))
+ joinExpressionsString += " AND " + "ds_use.ds_id=ds.id";
+ else {
+ joinExpressionsString += " AND " + joinExpressions.get(aux);
+ }
+
+ }
+ }
+
+ return joinExpressionsString;
+ }
+
+}
+
+query : SELECT selectExpression
+ {
+ schemaGraph = buildGraph();
+ joinEdges = computeJoinEdges(schemaGraph, relations);
+ hasWhereJoinExpression=true;
+
+ System.out.print("SELECT " + selectClause);
+ fromClause += computeFrom(schemaGraph, joinEdges, relations);
+
+ System.out.print(fromClause);
+
+ whereClauseJoinExpressions += computeJoinExpressions(schemaGraph, joinEdges, relations);
+
+ if(!whereClauseJoinExpressions.isEmpty()) {
+ hasWhereJoinExpression=true;
+ System.out.print(" WHERE " + whereClauseJoinExpressions);
+ }
+
+ }
+ (WHERE whereExpression
+ {
+ if(hasWhereJoinExpression)
+ System.out.print(",");
+ else
+ System.out.print(" WHERE ");
+ System.out.print(whereClause);
+ }
+ )?
+ SEMICOLON
+ {
+ System.out.print(";");
+ }
+ ;
+
+selectExpression
+ : a=entityAttribute
+ {
+ selectClause += $a.text;
+ relations.add($a.text.split("\\.")[0]);
+ if($a.text.split("\\.").length == 1)
+ selectClause += ".*";
+ }
+ (COLON b=entityAttribute
+ {
+ selectClause += "," + $b.text;
+ relations.add($b.text.split("\\.")[0]);
+ if($b.text.split("\\.").length == 1)
+ selectClause += ".*";
+ }
+ )*
+ ;
+
+whereExpression
+ : c=whereAtom
+ {
+ whereClause += $c.text;
+ }
+ (
+ (AND
+ {
+ whereClause += " AND ";
+ }
+ | OR
+ {
+ whereClause += " OR ";
+ }
+ ) d=whereAtom
+ {
+ whereClause += $d.text;
+ }
+ )*
+ ;
+
+whereAtom
+ : entityAttribute OP (STRING | INT | FLOAT)
+ | entityAttribute BETWEEN STRING AND STRING;
+
+entityAttribute : ID (DOT ID)?;
+
+OP : '=' | '>' | '>=' | '<' | '<=';
+
+SELECT : 's' 'e' 'l' 'e' 'c' 't';
+
+WHERE : 'w' 'h' 'e' 'r' 'e';
+
+AND : 'a' 'n' 'd';
+
+OR : 'o' 'r';
+
+DOT : '.';
+
+COLON : ',';
+
+BETWEEN : 'b' 'e' 't' 'w' 'e' 'e' 'n';
+
+SEMICOLON : ';';
+
+ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_'|'-')*
+ ;
+
+INT : '0'..'9'+
+ ;
+
+FLOAT
+ : ('0'..'9')+ '.' ('0'..'9')*
+ | '.' ('0'..'9')+
+ | ('0'..'9')+
+ ;
+
+STRING
+ : '\'' ( 'a'..'z' | 'A'..'Z' | '_' | '-' | '0'..'9' | '.')* '\''
+ ;
+
+NEWLINE : '\r' ? '\n';
+
+WS : (' ' |'\t' |'\n' |'\r' )+
+ {
+ skip();
+ }
+ ;
Added: provenancedb/compare_run.py
===================================================================
--- provenancedb/compare_run.py (rev 0)
+++ provenancedb/compare_run.py 2011-08-09 13:16:29 UTC (rev 4968)
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+
+import sys
+selectClause = 'SELECT run_id'
+fromClause = 'FROM'
+nId = 0
+for arg in sys.argv:
+ argTokens = arg.partition('=')
+
+ if argTokens[0] == 'annot_num' or argTokens[0] == 'annot_txt' or argTokens[0] == 'param':
+ key = argTokens[2]
+ nId+=1
+ sId = 'j%s' % nId
+ selectClause += ', ' + sId + '.value as ' + key
+ if nId>1:
+ fromClause += ' INNER JOIN'
+ fromClause += ' compare_run_by_' + argTokens[0] + '(\'' + key + '\') as ' + sId
+ if nId>1:
+ fromClause += ' USING (run_id)'
+
+query = selectClause + ' ' + fromClause + ';'
+
+print query
+
+
+
+
Property changes on: provenancedb/compare_run.py
___________________________________________________________________
Added: svn:executable
+ *
Added: provenancedb/compare_run.sh
===================================================================
--- provenancedb/compare_run.sh (rev 0)
+++ provenancedb/compare_run.sh 2011-08-09 13:16:29 UTC (rev 4968)
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+PROVDIR=$(dirname $0)
+pushd $PROVDIR
+PROVDIR=$(pwd)
+popd
+
+source $PROVDIR/etc/provenance.config
+export PATH=$PROVDIR:$PATH
+
+# TODO: Check python's version, should be >=2.6
+echo "DROP VIEW temp;" | $SQLCMD
+temp_view=$(python $PROVDIR/compare_run.py $@)
+echo "CREATE VIEW temp AS " $temp_view | $SQLCMD
+echo "SELECT * FROM temp;" | $SQLCMD
\ No newline at end of file
Property changes on: provenancedb/compare_run.sh
___________________________________________________________________
Added: svn:executable
+ *
Added: provenancedb/import-run-to-datalog
===================================================================
--- provenancedb/import-run-to-datalog (rev 0)
+++ provenancedb/import-run-to-datalog 2011-08-09 13:16:29 UTC (rev 4968)
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# this should be the main driver script which can be run right after a
+# workflow has finished and will do everything necessary to import all
+# of the provenance information into the SQL provenance database.
+
+# invoke with: import-run-to-sql [logfile]
+# where [logfile] is a full path *not relative* to the log file
+# with kickstart records expected to be in the same directory as the
+# log file.
+
+version=$version prov-to-datalog.sh $1
+
Property changes on: provenancedb/import-run-to-datalog
___________________________________________________________________
Added: svn:executable
+ *
Modified: provenancedb/pql_functions.sql
===================================================================
--- provenancedb/pql_functions.sql 2011-08-09 11:16:34 UTC (rev 4967)
+++ provenancedb/pql_functions.sql 2011-08-09 13:16:29 UTC (rev 4968)
@@ -5,7 +5,6 @@
-- lists variations in a parameter's value across workflows, for parameters that are in-memory variables
drop view in_mem_in cascade;
-
create view in_mem_in as
select proc.run_id, proc.id as proc_id,
proc.name as proc_name, ds_in.ds_id,
@@ -14,7 +13,6 @@
where proc.id=ds_in.proc_id and ds_in.ds_id=in_mem.id;
drop view in_mem_out cascade;
-
create view in_mem_out as
select proc.run_id, proc.id as proc_id,
proc.name as proc_name, ds_out.ds_id,
@@ -23,14 +21,12 @@
where proc.id=ds_out.proc_id and ds_out.ds_id=in_mem.id;
drop view in_mem_use cascade;
-
create view in_mem_use as
select * from in_mem_in
union
select * from in_mem_out;
drop view file_in cascade;
-
create view file_in as
select proc.run_id, proc.id as proc_id,
proc.name as proc_name, ds_in.ds_id,
@@ -39,7 +35,6 @@
where proc.id=ds_in.proc_id and ds_in.ds_id=file.id;
drop view file_out cascade;
-
create view file_out as
select proc.run_id, proc.id as proc_id,
proc.name as proc_name, ds_out.ds_id,
@@ -48,21 +43,18 @@
where proc.id=ds_out.proc_id and ds_out.ds_id=file.id;
drop view file_use cascade;
-
create view file_use as
select * from file_in
union
select * from file_out;
-drop view ds_use cascade;
-
-create view ds_use as
+drop view ds_param_value cascade;
+create view ds_param_value as
select * from in_mem_use
union
select * from file_use;
drop view a_t cascade;
-
create view a_t as
select run.id, a_run_t.name, a_run_t.value
from run, a_run_t
@@ -77,7 +69,6 @@
where ds_use.ds_id=a_ds_t.ds_id;
drop view a_n cascade;
-
create view a_n as
select run.id, a_run_n.name, a_run_n.value
from run, a_run_n
@@ -91,18 +82,23 @@
from ds_use, a_ds_n
where ds_use.ds_id=a_ds_n.ds_id;
+drop view ds_use cascade;
+create view ds_use as
+ select * from ds_in
+ union all
+ select * from ds_out;
-drop type compare_run_by_parameter_type cascade;
-create type compare_run_by_parameter_type as (run_id varchar, param varchar, value varchar);
+drop type compare_run_by_param_type cascade;
+create type compare_run_by_param_type as (run_id varchar, param varchar, value varchar);
-create or replace function compare_run_by_parameter(param_name varchar)
-returns setof compare_run_by_parameter_type
+create or replace function compare_run_by_param(param_name varchar)
+returns setof compare_run_by_param_type
as $$
select run_id, param, value
- from ds_use
- where param=$1;
+ from ds_use,proc,in_mem
+ where proc.id=ds_use.proc_id and ds_use.ds_id=in_mem.id and param=$1;
$$ language sql;
-- PostgreSQL >= 9.0
@@ -126,19 +122,7 @@
-- GROUP BY proc.run_id, ds_in.param, in_mem.value
--$$ LANGUAGE SQL;
-DROP TYPE compare_run_by_parameter_type2;
-CREATE TYPE compare_run_by_parameter_type2 AS (run_id VARCHAR, param1 VARCHAR, value1 VARCHAR, param2 VARCHAR, value2 VARCHAR);
-CREATE OR REPLACE FUNCTION compare_run_by_parameter(param_name1 VARCHAR, param_name2 VARCHAR)
-RETURNS SETOF compare_run_by_parameter_type2
-AS $$
- SELECT *
- FROM compare_run_by_parameter($1) as t
- INNER JOIN
- compare_run_by_parameter($2) as s
- USING (run_id);
-$$ LANGUAGE SQL;
-
--CREATE OR REPLACE FUNCTION compare_run_by_parameter(param_name1 VARCHAR, param_name2 VARCHAR)
--RETURNS TABLE (
-- workflow_id VARCHAR,
@@ -156,73 +140,105 @@
--$$ LANGUAGE SQL;
-CREATE OR REPLACE FUNCTION compare_run_by_parameter(param_name1 VARCHAR, param_name2 VARCHAR, param_name3 VARCHAR)
-RETURNS TABLE (
- workflow_id VARCHAR,
- param_name1 VARCHAR,
- value1 VARCHAR,
- param_name2 VARCHAR,
- value2 VARCHAR,
- param_name3 VARCHAR,
- value3 VARCHAR
-)
-AS $$
- SELECT *
- FROM compare_run_by_parameter($1, $2) as t
- INNER JOIN
- compare_run_by_parameter($3) as s
- USING (workflow_id);
-$$ LANGUAGE SQL;
+DROP TYPE compare_run_by_annot_num_type;
+CREATE TYPE compare_run_by_annot_num_type as (run_id VARCHAR, name VARCHAR, value NUMERIC);
-
CREATE OR REPLACE FUNCTION compare_run_by_annot_num(name VARCHAR)
-RETURNS TABLE (
- workflow_id VARCHAR,
- name VARCHAR,
- value NUMERIC
-)
+RETURNS SETOF compare_run_by_annot_num_type
AS $$
- SELECT process.workflow_id, annot_ds_num.name, annot_ds_num.value
- FROM annot_ds_num,ds_usage,ds_containment,process
- WHERE annot_ds_num.id=ds_containment.in_id AND ds_containment.out_id=ds_usage.dataset_id AND
- ds_usage.process_id=process.id AND annot_ds_num.name=$1
+ SELECT proc.run_id, a_ds_n.name, a_ds_n.value
+ FROM a_ds_n,ds_use,ds_cont,proc
+ WHERE a_ds_n.ds_id=ds_cont.in_id AND ds_cont.out_id=ds_use.ds_id AND
+ ds_use.proc_id=proc.id AND a_ds_n.name=$1
UNION
- SELECT process.workflow_id, annot_ds_num.name, annot_ds_num.value
- FROM process, ds_usage, annot_ds_num
- WHERE process.id=ds_usage.process_id and ds_usage.dataset_id=annot_ds_num.id and
- annot_ds_num.name=$1
+ SELECT proc.run_id, a_ds_n.name, a_ds_n.value
+ FROM proc, ds_use, a_ds_n
+ WHERE proc.id=ds_use.proc_id and ds_use.ds_id=a_ds_n.ds_id and
+ a_ds_n.name=$1
UNION
- SELECT process.workflow_id, annot_p_num.name, annot_p_num.value
- FROM process, annot_p_num
- WHERE process.id=annot_p_num.id and annot_p_num.name=$1
+ SELECT proc.run_id, a_proc_n.name, a_proc_n.value
+ FROM proc, a_proc_n
+ WHERE proc.id=a_proc_n.proc_id and a_proc_n.name=$1
UNION
- SELECT workflow.id as workflow_id, annot_wf_num.name, annot_wf_num.value
- FROM workflow, annot_wf_num
- WHERE workflow.id=annot_wf_num.id and annot_wf_num.name=$1
+ SELECT run.id as run_id, a_run_n.name, a_run_n.value
+ FROM run, a_run_n
+ WHERE run.id=a_run_n.run_id and a_run_n.name=$1
$$ LANGUAGE SQL;
+DROP TYPE compare_run_by_annot_txt_type;
+CREATE TYPE compare_run_by_annot_txt_type as (run_id VARCHAR, name VARCHAR, value VARCHAR);
CREATE OR REPLACE FUNCTION compare_run_by_annot_txt(name VARCHAR)
-RETURNS TABLE (
- workflow_id VARCHAR,
- name VARCHAR,
- value VARCHAR)
+RETURNS SETOF compare_run_by_annot_txt_type
AS $$
- SELECT process.workflow_id, annot_ds_txt.name, annot_ds_txt.value
- FROM process, ds_usage, annot_ds_txt
- WHERE process.id=ds_usage.process_id and ds_usage.dataset_id=annot_ds_txt.id and
- annot_ds_txt.name=$1
+ SELECT proc.run_id, a_ds_t.name, a_ds_t.value
+ FROM a_ds_t,ds_use,ds_cont,proc
+ WHERE a_ds_t.ds_id=ds_cont.in_id AND ds_cont.out_id=ds_use.ds_id AND
+ ds_use.proc_id=proc.id AND a_ds_t.name=$1
UNION
- SELECT process.workflow_id, annot_p_txt.name, annot_p_txt.value
- FROM process, annot_p_txt
- WHERE process.id=annot_p_txt.id and annot_p_txt.name=$1
+ SELECT proc.run_id, a_ds_t.name, a_ds_t.value
+ FROM proc, ds_use, a_ds_t
+ WHERE proc.id=ds_use.proc_id and ds_use.ds_id=a_ds_t.ds_id and
+ a_ds_t.name=$1
UNION
- SELECT workflow.id as workflow_id, annot_wf_txt.name, annot_wf_txt.value
- FROM workflow, annot_wf_txt
- WHERE workflow.id=annot_wf_txt.id and annot_wf_txt.name=$1
+ SELECT proc.run_id, a_proc_t.name, a_proc_t.value
+ FROM proc, a_proc_t
+ WHERE proc.id=a_proc_t.proc_id and a_proc_t.name=$1
+ UNION
+ SELECT run.id as run_id, a_run_t.name, a_run_t.value
+ FROM run, a_run_t
+ WHERE run.id=a_run_t.run_id and a_run_t.name=$1
$$ LANGUAGE SQL;
+-- CREATE OR REPLACE FUNCTION compare_run_by_annot_num(name VARCHAR)
+-- RETURNS TABLE (
+-- workflow_id VARCHAR,
+-- name VARCHAR,
+-- value NUMERIC
+-- )
+-- AS $$
+-- SELECT process.workflow_id, annot_ds_num.name, annot_ds_num.value
+-- FROM annot_ds_num,ds_usage,ds_containment,process
+-- WHERE annot_ds_num.id=ds_containment.in_id AND ds_containment.out_id=ds_usage.dataset_id AND
+-- ds_usage.process_id=process.id AND annot_ds_num.name=$1
+-- UNION
+-- SELECT process.workflow_id, annot_ds_num.name, annot_ds_num.value
+-- FROM process, ds_usage, annot_ds_num
+-- WHERE process.id=ds_usage.process_id and ds_usage.dataset_id=annot_ds_num.id and
+-- annot_ds_num.name=$1
+-- UNION
+-- SELECT process.workflow_id, annot_p_num.name, annot_p_num.value
+-- FROM process, annot_p_num
+-- WHERE process.id=annot_p_num.id and annot_p_num.name=$1
+-- UNION
+-- SELECT workflow.id as workflow_id, annot_wf_num.name, annot_wf_num.value
+-- FROM workflow, annot_wf_num
+-- WHERE workflow.id=annot_wf_num.id and annot_wf_num.name=$1
+-- $$ LANGUAGE SQL;
+
+
+-- CREATE OR REPLACE FUNCTION compare_run_by_annot_txt(name VARCHAR)
+-- RETURNS TABLE (
+-- workflow_id VARCHAR,
+-- name VARCHAR,
+-- value VARCHAR)
+-- AS $$
+-- SELECT process.workflow_id, annot_ds_txt.name, annot_ds_txt.value
+-- FROM process, ds_usage, annot_ds_txt
+-- WHERE process.id=ds_usage.process_id and ds_usage.dataset_id=annot_ds_txt.id and
+-- annot_ds_txt.name=$1
+-- UNION
+-- SELECT process.workflow_id, annot_p_txt.name, annot_p_txt.value
+-- FROM process, annot_p_txt
+-- WHERE process.id=annot_p_txt.id and annot_p_txt.name=$1
+-- UNION
+-- SELECT workflow.id as workflow_id, annot_wf_txt.name, annot_wf_txt.value
+-- FROM workflow, annot_wf_txt
+-- WHERE workflow.id=annot_wf_txt.id and annot_wf_txt.name=$1
+-- $$ LANGUAGE SQL;
+
+
CREATE OR REPLACE FUNCTION compare_run_by_annot_bool(name VARCHAR)
RETURNS TABLE (
workflow_id VARCHAR,
@@ -262,16 +278,20 @@
$$ LANGUAGE SQL;
-- recursive query to find ancestor entities in a provenance graph
-CREATE OR REPLACE FUNCTION ancestors(varchar) RETURNS SETOF varchar AS $$
- WITH RECURSIVE anc(ancestor,descendant) AS
- (
- SELECT parent AS ancestor, child AS descendant FROM parent_of WHERE child=$1
- UNION
- SELECT parent_of.parent AS ancestor, anc.descendant AS descendant
- FROM anc,parent_of
- WHERE anc.ancestor=parent_of.child
- )
- SELECT ancestor FROM anc
+CREATE OR REPLACE FUNCTION ancestors(varchar)
+RETURNS SETOF varchar AS $$
+ WITH RECURSIVE anc(ancestor,descendant) AS
+ (
+ SELECT parent AS ancestor, child AS descendant
+ FROM parent_of
+ WHERE child=$1
+ UNION
+ SELECT parent_of.parent AS ancestor,
+ anc.descendant AS descendant
+ FROM anc,parent_of
+ WHERE anc.ancestor=parent_of.child
+ )
+ SELECT ancestor FROM anc
$$ LANGUAGE SQL;
Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql 2011-08-09 11:16:34 UTC (rev 4967)
+++ provenancedb/prov-init.sql 2011-08-09 13:16:29 UTC (rev 4968)
@@ -42,7 +42,7 @@
-- has an entry for this process.
-- process types: internal, rootthread, execute, function, compound, scope, operator
create table proc
- (id varchar(256) primary key,
+ (id varchar(256) primary key,
type varchar(16),
name varchar(256), -- in the case of an execute this refers to the transformation name in tc.data
run_id varchar(256) references run (id) on delete cascade -- normalize: workflow_id of sub-procedure determined
@@ -187,11 +187,17 @@
primary key (run_id, name)
);
+create table iq
+ ( idx serial primary key,
+ q varchar(2048)
+ );
+
+drop view pgraph_edge;
create view pgraph_edge as
select proc_id as parent,ds_id as child from ds_out
- union
+ union all
select ds_id as parent,proc_id as child from ds_in
- union
+ union all
select out_id as parent,in_id as child from ds_cont;
-- continue renaming from here
Added: provenancedb/prov-to-datalog.sh
===================================================================
--- provenancedb/prov-to-datalog.sh (rev 0)
+++ provenancedb/prov-to-datalog.sh 2011-08-09 13:16:29 UTC (rev 4968)
@@ -0,0 +1,186 @@
+#!/bin/bash
+
+export RUNID=$(basename $1 .log)
+
+export WFID="execute:${RUNID}:"
+
+# TODO is there already a URI form for identifying workflows?
+export WF="${RUNID}"
+
+echo Generating Datalog for $RUNID
+
+# this gives a distinction between the root process for a workflow and the
+# workflow itself. perhaps better to model the workflow as a process
+echo "isProcess('${WFID}0')." > tmp.datalog
+echo "hasType('${WFID}0', 'rootthread')." >> tmp.datalog
+echo "hasName('${WFID}0', 'name')." >> tmp.datalog
+echo "isInScriptRun('${WFID}0', '$WF')." >> tmp.datalog
+
+while read time duration thread localthread endstate tr_name scratch; do
+ echo "isProcess('$thread')." >> tmp.datalog
+ echo "hasType('$thread', 'execute')." >> tmp.datalog
+ echo "hasName('$thread', '$tr_name')." >> tmp.datalog
+ echo "hasStartTime('$thread', $time)." >> tmp.datalog
+ echo "hasDuration('$thread', $duration)." >> tmp.datalog
+ echo "hasFinalState('$thread', '$endstate')." >> tmp.datalog
+done < execute.global.event
+
+while read start_time duration globalid id endstate thread site scratch; do
+ # cut off the last component of the thread, so that we end up at the
+ # parent thread id which should correspond with the execute-level ID
+ inv_id="$WFID$(echo $thread | sed 's/-[^-]*$//')" >> tmp.datalog
+ echo "isExecutionAttempt('$globalid')." >> tmp.datalog
+ echo "attemptsToExecute('$globalid', '$inv_id')." >> tmp.datalog
+ echo "hasStartTime('$globalid', $start_time)." >> tmp.datalog
+ echo "hasDuration('$globalid', $duration)." >> tmp.datalog
+ echo "hasFinalState('$globalid', '$endstate')." >> tmp.datalog
+ echo "executedAtSite('$globalid', '$site')." >> tmp.datalog
+done < execute2.global.event
+
+while read col1 col2 col3 col4 col5 thread name lhs rhs result; do
+ thread=$(echo $thread | awk 'BEGIN { FS = "=" }; {print $2}')
+ name=$(echo $name | awk 'BEGIN { FS = "=" }; {print $2}')
+ lhs=$(echo $lhs | awk 'BEGIN { FS = "=" }; {print $2}')
+ rhs=$(echo $rhs | awk 'BEGIN { FS = "=" }; {print $2}')
+ result=$(echo $result | awk 'BEGIN { FS = "=" }; {print $2}')
+
+ operatorid="${WFID}operator:$thread"
+
+ if [ $version -le 3726 ]; then
+ lhs=$(echo $lhs | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
+ rhs=$(echo $rhs | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
+ result=$(echo $result | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
+ fi
+
+ echo "isDataset('$lhs')." >> tmp.datalog
+ echo "isDataset('$rhs')." >> tmp.datalog
+ echo "isDataset('$result')." >> tmp.datalog
+ echo "isProcess('$operatorid')." >> tmp.datalog
+ echo "hasType('$operatorid', 'operator')." >> tmp.datalog
+ echo "hasName('$operatorid', '$name')." >> tmp.datalog
+ echo "used('$operatorid', '$lhs', 'lhs')." >> tmp.datalog
+ echo "used('$operatorid', '$rhs', 'rhs')." >> tmp.datalog
+ echo "wasGeneratedBy('$result', '$operatorid', 'result')." >> tmp.datalog
+done < operators.txt
+
+while read id name output; do
+ echo "isDataset('$output')." >> tmp.datalog
+ echo "isProcess('$id')." >> tmp.datalog
+ echo "hasType('$id', 'function')." >> tmp.datalog
+ echo "hasName('$id', '$name')." >> tmp.datalog
+ echo "isInScriptRun('$id', '$WF')." >> tmp.datalog
+ echo "wasGeneratedBy('$output', '$id', 'result')." >> tmp.datalog
+done < functions.txt
+
+while read id value; do
+ # TODO need ordering/naming
+ echo "isDataset('$value')." >> tmp.datalog
+ echo "wasGeneratedBy('$value', '$id', 'undefined')." >> tmp.datalog
+done < function-inputs.txt
+
+
+while read thread appname; do
+ echo "hasName('$thread', '$appname')." >> tmp.datalog
+done < invocation-procedure-names.txt
+
+while read outer inner; do
+ echo "isDataset('$outer')." >> tmp.datalog
+ echo "isDataset('$inner')." >> tmp.datalog
+ echo "isContainedIn('$inner', '$outer')." >> tmp.datalog
+done < tie-containers.txt
+
+while read dataset filename; do
+
+ if [ $version -le 3726 ]; then
+ dataset=$(echo $dataset | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
+ fi
+ echo "isDataset('$dataset')." >> tmp.datalog
+ echo "hasFileName('$dataset', '$filename')." >> tmp.datalog
+done < dataset-filenames.txt
+
+while read dataset idtype equal value rest; do
+
+ if [ $version -le 3726 ]; then
+ dataset=$(echo $dataset | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
+ fi
+
+ echo "isDataset('$dataset')." >> tmp.datalog
+ echo "hasValue('$dataset', '$value')." >> tmp.datalog
+done < dataset-values.txt
+
+while read start duration wfid rest; do
+ echo "hasStartTime('$WF', $start)." >> tmp.datalog
+ echo "hasDuration('$WF', $duration)." >> tmp.datalog
+done < workflow.event
+
+
+# TODO this could merge with other naming tables
+while read start duration thread final_state procname ; do
+ if [ "$duration" != "last-event-line" ]; then
+ compoundid=$WFID$thread
+ echo "isProcess('$compoundid')." >> tmp.datalog
+ echo "hasType('compound')." >> tmp.datalog
+ echo "hasName('$procname')." >> tmp.datalog
+ echo "isInScriptRun('$compoundid', '$WF')." >> tmp.datalog
+ fi
+done < compound.event
+
+while read start duration thread final_state procname ; do
+ if [ "$duration" != "last-event-line" ]; then
+ fqid=$WFID$thread
+ echo "isProcess('$fqid')." >> tmp.datalog
+ echo "hasType('internal')." >> tmp.datalog
+ echo "hasName('$procname')." >> tmp.datalog
+ echo "isInScriptRun('$fqid', '$WF')." >> tmp.datalog
+ fi
+done < internalproc.event
+
+while read t ; do
+ thread="${WFID}$t"
+ echo "isProcess('$thread')." >> tmp.datalog
+ echo "hasType('scope')." >> tmp.datalog
+ echo "hasName('scope')." >> tmp.datalog
+ echo "isInScriptRun('$thread', '$WF')." >> tmp.datalog
+done < scopes.txt
+
+while read thread direction dataset variable rest; do
+
+ dataset=$(echo $dataset | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
+ echo "isDataset('$dataset')." >> tmp.datalog
+
+ if [ "$direction" == "input" ] ; then
+ echo "used('$thread', '$dataset', '$variable')." >> tmp.datalog
+ else
+ echo "wasGeneratedBy('$dataset', '$thread', '$variable')." >> tmp.datalog
+ fi
+
+done < tie-data-invocs.txt
+
+if [ -f extrainfo.txt ]; then
+ while read execute2_id extrainfo; do
+ echo $extrainfo | awk -F ";" '{ for (i = 1; i <= NF; i++)
+ print $i
+ }' | awk -F "=" '{ print $1 " " $2 }' | awk -F ":" '{ print $1 " " $2 }' > fields.txt
+ id=$($SQLCMD --tuples-only -c "select app_inv_id from app_exec where id='$execute2_id';" | awk '{print $1}')
+ while read name type value; do
+ # TODO: check types
+ echo "hasAnnotation('$id', '$name', '$value')." >> tmp.datalog
+ done < fields.txt
+ done < extrainfo.txt
+fi
+
+if [ -f runtime.txt ]; then
+ while read execute2_id runtime; do
+ timestamp=$(echo $runtime | awk -F "," '{print $1}' | awk -F ":" '{print $2}')
+ cpu_usage=$(echo $runtime | awk -F "," '{print $2}' | awk -F ":" '{print $2}')
+ max_phys_mem=$(echo $runtime | awk -F "," '{print $3}' | awk -F ":" '{print $2}')
+ max_virtual_mem=$(echo $runtime | awk -F "," '{print $4}' | awk -F ":" '{print $2}')
+ io_read_bytes=$(echo $runtime | awk -F "," '{print $5}' | awk -F ":" '{print $2}')
+ io_write_bytes=$(echo $runtime | awk -F "," '{print $6}' | awk -F ":" '{print $2}')
+ echo "hasRuntimeInfo('$execute2_id', $timestamp, $cpu_usage, $max_phys_mem, $max_virtual_mem, $io_read_bytes, $io_write_bytes)." >> tmp.datalog
+ done < runtime.txt
+fi
+
+cat tmp.datalog | sort | uniq >> provenancedb.datalog
+
+echo Finished writing Datalog.
\ No newline at end of file
Property changes on: provenancedb/prov-to-datalog.sh
___________________________________________________________________
Added: svn:executable
+ *
Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh 2011-08-09 11:16:34 UTC (rev 4967)
+++ provenancedb/prov-to-sql.sh 2011-08-09 13:16:29 UTC (rev 4968)
@@ -39,7 +39,7 @@
rhs=$(echo $rhs | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
result=$(echo $result | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
fi
-
+
$SQLCMD -c "INSERT INTO ds (id) VALUES ('$lhs');"
$SQLCMD -c "INSERT INTO ds (id) VALUES ('$rhs');"
$SQLCMD -c "INSERT INTO ds (id) VALUES ('$result');"
@@ -96,7 +96,7 @@
$SQLCMD -c "INSERT INTO file (id, name) VALUES ('$dataset', '$filename');"
done < dataset-filenames.txt
-while read dataset value; do
+while read dataset idtype equal value rest; do
if [ $version -le 3726 ]; then
dataset=$(echo $dataset | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
Deleted: provenancedb/prov-to-swipl.sh
===================================================================
--- provenancedb/prov-to-swipl.sh 2011-08-09 11:16:34 UTC (rev 4967)
+++ provenancedb/prov-to-swipl.sh 2011-08-09 13:16:29 UTC (rev 4968)
@@ -1,32 +0,0 @@
-#!/bin/bash
-
-rm -f tmp-import.pl
-
-while read time duration thread endstate app; do
- echo "execute('$thread', $time, $duration, '$endstate', '$app')." >> tmp-import.pl
-done < $LOGDIR/execute.event
-
-while read thread direction dataset variable value rest; do
- if [ "$direction" == "input" ] ; then
- dir=I
- else
- dir=O
- fi
- echo "dataset_usage('$thread', '$dir', '$dataset', '$variable', '$value')." >> tmp-import.pl
-done < $LOGDIR/tie-data-invocs.txt
-
-while read thread appname; do
- echo "invocation_procedure_names('$thread', '$appname')." >> tmp-import.pl
-
-done < $LOGDIR/invocation-procedure-names.txt
-
-while read outer inner; do
- echo "dataset_containment('$outer', '$inner')." >> tmp-import.pl
-done < $LOGDIR/tie-containers.txt
-
-while read dataset filename; do
- echo "dataset_filenames('$dataset', '$filename')." >> tmp-import.pl
-done < $LOGDIR/dataset-filenames.txt
-
-swipl -g "['tmp-import.pl']"
-
Added: provenancedb/provenancedb-rules.datalog
===================================================================
--- provenancedb/provenancedb-rules.datalog (rev 0)
+++ provenancedb/provenancedb-rules.datalog 2011-08-09 13:16:29 UTC (rev 4968)
@@ -0,0 +1,45 @@
+isArtifact(X) :- isDataset(X).
+
+%% used(Process, Dataset, Role).
+%% wasGeneratedBy(Dataset, Process, Role).
+isProcess(X) :- used(X,_,_).
+isProcess(Y) :- wasGeneratedBy(_,Y,_).
+
+isDataset(Y) :- used(_,Y,_).
+isDataset(X) :- wasGeneratedBy(X,_,_).
+
+:- table isAncestor/2.
+isAncestor(X,Y) :- used(Y,X,_).
+isAncestor(X,Y) :- used(Y,Z,_),isAncestor(X,Z).
+isAncestor(X,Y) :- wasGeneratedBy(Y,X,_).
+isAncestor(X,Y) :- wasGeneratedBy(Y,Z,_),isAncestor(X,Z).
+%%isAncestor(X,Y) :- isContainedIn(X,Y).
+%%isAncestor(X,Y) :- isContainedIn(Z,Y),isAncestor(X,Z).
+
+:- table wasDerivedFrom/2.
+wasDerivedFrom(X,Y) :- used(Z,Y,_),wasGeneratedBy(X,Z,_).
+
+:- table wasTriggeredBy/2.
+wasTriggeredBy(X,Y) :- wasGeneratedBy(Z,Y,_),used(X,Z,_).
+
+:- table isContainedInTC/2.
+isContainedInTC(X,Y) :- isContainedIn(X,Y).
+isContainedInTC(X,Y) :- isContainedIn(X,Z),isContainedInTC(Z,Y).
+
+:- table wasDerivedFromTC/2.
+wasDerivedFromTC(X,Y) :- wasDerivedFrom(X,Y).
+wasDerivedFromTC(X,Y) :- wasDerivedFrom(X,Z),wasDerivedFromTC(Z,Y).
+
+:- table wasTriggeredByTC/2.
+wasTriggeredByTC(X,Y) :- wasTriggeredBy(X,Y).
+wasTriggeredByTC(X,Y) :- wasTriggeredBy(X,Z),wasTriggeredByTC(Z,Y).
+
+isInRun(D,X) :- wasGeneratedBy(D,P,_),isInRun(P,X).
+isInRun(D,X) :- used(P,D,_),isInRun(P,X).
+
+runCorrelation(R,D,[N|L],V) :- isRun(R),isDataset(D),isInRun(D,R),hasAnnotation(D,N,V).
+runCorrelation(R,P,[N|L],V) :- isRun(R),isProcess(P),isInRun(P,R),hasAnnotation(P,N,V).
+runCorrelation(R,P,[N|L],V) :- isRun(R),isPro
+
+
+
Modified: provenancedb/swift-prov-import-all-logs
===================================================================
--- provenancedb/swift-prov-import-all-logs 2011-08-09 11:16:34 UTC (rev 4967)
+++ provenancedb/swift-prov-import-all-logs 2011-08-09 13:16:29 UTC (rev 4968)
@@ -13,6 +13,7 @@
# this generates a file with tuples like:
# <starttime> <swift version> <logfilename>
+# This is where Swift's version is collected.
swift-plot-log $LOGREPO everylog-vs-versions.data
if [ "$?" != "0" ]; then
Added: provenancedb/swift-prov-import-all-logs-datalog
===================================================================
--- provenancedb/swift-prov-import-all-logs-datalog (rev 0)
+++ provenancedb/swift-prov-import-all-logs-datalog 2011-08-09 13:16:29 UTC (rev 4968)
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+PROVDIR=$(dirname $0)
+pushd $PROVDIR
+PROVDIR=$(pwd)
+popd
+
+# we need to keep this out of the log-proceesing dir because import
+# of individual runs will clean other files.
+
+source $PROVDIR/etc/provenance.config
+export PATH=$PROVDIR:$PATH
+
+# this generates a file with tuples like:
+# <starttime> <swift version> <logfilename>
+# This is where Swift's version is collected.
+swift-plot-log $LOGREPO everylog-vs-versions.data
+
+if [ "$?" != "0" ]; then
+ echo swift-plot-log failed when building everylog-vs-versions.data
+ exit 1
+fi
+
+# TODO better tmp handling that always using the same name in a shared
+# directory
+cp everylog-vs-versions.data /tmp/
+
+echo first commandline param is $1
+if [ "$1" == "rebuild" ]; then
+ echo CLEANING DATABASE
+ cat $PROVDIR/provenancedb-rules.datalog > provenancedb.datalog
+fi
+
+while read start version filename; do
+
+ export IDIR=$(echo $filename | sed 's/\.log$/.d/')
+ echo IDIR=$IDIR
+ if [ $version -ge 1538 ]; then
+ echo -n "Log: $filename ... "
+
+ EXISTING=$(grep "hasLogFilename('$filename')." provenancedb.datalog)
+
+ if [ -z "$EXISTING" ]; then
+ PROV_ENABLED=$(grep provenanceid $filename | wc -l)
+ if [ $PROV_ENABLED -gt 0 ]; then
+ echo IMPORTING
+
+ if grep --silent "Loader Swift finished with no errors" $filename; then
+ wfstatus="SUCCESS"
+ else
+ wfstatus="FAIL"
+ fi
+
+ export RUNID=$(basename $filename .log)
+
+ export WF="${RUNID}"
+ echo WP1
+ echo "isRun('$WF')." >> provenancedb.datalog
+ echo "hasLogFilename('$WF', '$filename')." >> provenancedb.datalog
+ echo "hasSwiftVersion('$WF', '$version')." >> provenancedb.datalog
+ echo "hasFinalState('$WF', '$wfstatus')." >> provenancedb.datalog
+
+ echo version $version in log file $filename
+ echo ============= will import =============
+ prepare-for-import $filename
+ if [ "$?" != "0" ]; then
+ echo prepare-for-import failed
+ exit 2
+ fi
+ version=$version import-run-to-datalog $filename
+ if [ "$?" != "0" ]; then
+ echo import-run-to-datalog failed
+ exit 3
+ fi
+ else
+ echo SKIP: provenance.log not set to true in etc/swift.properties
+ fi
+ else
+ echo SKIP: Already known in workflow
+ fi
+ fi
+done < /tmp/everylog-vs-versions.data
Property changes on: provenancedb/swift-prov-import-all-logs-datalog
___________________________________________________________________
Added: svn:executable
+ *
Deleted: provenancedb/xq.xq
===================================================================
--- provenancedb/xq.xq 2011-08-09 11:16:34 UTC (rev 4967)
+++ provenancedb/xq.xq 2011-08-09 13:16:29 UTC (rev 4968)
@@ -1,6 +0,0 @@
-for $t in //tie
- let $dataset := //dataset[@identifier=$t/dataset]
- let $exec := //execute[thread=$t/thread]
- where $t/direction="input"
- return <r>An invocation of {$exec/trname} took input {$dataset/filename}</r>
-
More information about the Swift-commit
mailing list