[Swift-commit] r7347 - provenancedb
lgadelha at ci.uchicago.edu
lgadelha at ci.uchicago.edu
Fri Nov 29 07:26:07 CST 2013
Author: lgadelha
Date: 2013-11-29 07:26:05 -0600 (Fri, 29 Nov 2013)
New Revision: 7347
Removed:
provenancedb/ProvSQL.g
Modified:
provenancedb/prov-init.sql
Log:
Minor changes, added view to display annotations of any entity type and data type
Deleted: provenancedb/ProvSQL.g
===================================================================
--- provenancedb/ProvSQL.g 2013-11-29 08:52:22 UTC (rev 7346)
+++ provenancedb/ProvSQL.g 2013-11-29 13:26:05 UTC (rev 7347)
@@ -1,760 +0,0 @@
-grammar ProvSQL;
-
- at header {
- import java.util.HashSet;
- import java.util.HashMap;
- import java.util.Iterator;
- import org.jgrapht.*;
- import org.jgrapht.alg.DijkstraShortestPath;
- import org.jgrapht.graph.*;
-}
-
- at members {
- String selectClause = new String();
- String fromClause = new String();
- String whereClauseJoinExpressions = new String();
- String whereClause = new String();
- boolean hasWhereJoinExpression;
- boolean hasWhereExpression = false;
- static boolean hasCompareRunCall = false;
- HashSet<String> relations = new HashSet<String>();
- UndirectedGraph<String,DefaultEdge> schemaGraph;
- HashSet<DefaultEdge> joinEdges;
- static HashSet<String> compareRunParams = new HashSet<String>();;
-
- // Ideally it could receive a DB schema in SQL and build the graph automatically
- public static UndirectedGraph<String,DefaultEdge> buildGraph() {
- UndirectedGraph<String,DefaultEdge> schemaGraph = new Multigraph<String,DefaultEdge>(DefaultEdge.class);
- schemaGraph.addVertex("a_run_t");
- schemaGraph.addVertex("a_run_n");
- schemaGraph.addVertex("run");
- schemaGraph.addVertex("proc");
- schemaGraph.addVertex("a_proc_n");
- schemaGraph.addVertex("a_proc_t");
- schemaGraph.addVertex("app_inv");
- schemaGraph.addVertex("app_exec");
- schemaGraph.addVertex("rt_info");
- schemaGraph.addVertex("ds_in");
- schemaGraph.addVertex("ds_out");
- schemaGraph.addVertex("ds");
- schemaGraph.addVertex("file");
- schemaGraph.addVertex("in_mem");
- schemaGraph.addVertex("a_ds_t");
- schemaGraph.addVertex("a_ds_n");
- schemaGraph.addVertex("ds_cont");
- schemaGraph.addVertex("compare_run");
- schemaGraph.addEdge("a_run_t", "run");
- schemaGraph.addEdge("a_run_n", "run");
- schemaGraph.addEdge("run","proc");
- schemaGraph.addEdge("proc", "a_proc_t");
- schemaGraph.addEdge("proc", "a_proc_n");
- schemaGraph.addEdge("proc", "ds_out");
- schemaGraph.addEdge("proc", "ds_in");
- schemaGraph.addEdge("proc", "app_inv");
- schemaGraph.addEdge("app_inv", "app_exec");
- schemaGraph.addEdge("app_exec", "rt_info");
- schemaGraph.addEdge("ds", "ds_in");
- schemaGraph.addEdge("ds", "ds_out");
- schemaGraph.addEdge("ds", "a_ds_t");
- schemaGraph.addEdge("ds", "a_ds_n");
- schemaGraph.addEdge("ds", "file");
- schemaGraph.addEdge("ds", "in_mem");
- schemaGraph.addEdge("ds", "ds_cont");
- schemaGraph.addEdge("ds", "ds_cont");
-
- return schemaGraph;
- }
-
- private static HashSet<DefaultEdge> computeJoinEdges(
- UndirectedGraph<String, DefaultEdge> schemaGraph,
- HashSet<String> relations) {
- HashSet<DefaultEdge> jEdges = new HashSet<DefaultEdge>();
- Iterator<String> i = relations.iterator();
- String first = new String();
- if(i.hasNext())
- first += i.next();
- while(i.hasNext()) {
- DijkstraShortestPath<String, DefaultEdge> sP = new DijkstraShortestPath<String, DefaultEdge>(schemaGraph, first, i.next());
- Iterator<DefaultEdge> j = (sP.getPathEdgeList()).iterator();
- while(j.hasNext())
- jEdges.add(j.next());
- }
- return jEdges;
- }
-
- public static String computeFrom(UndirectedGraph<String,DefaultEdge> schemaGraph, HashSet<DefaultEdge> joinEdges, HashSet<String> qrels) {
- HashSet<String> fromRels = new HashSet<String>();
- String fromq = " FROM ";
- Iterator<DefaultEdge> i = joinEdges.iterator();
- Iterator<String> k = qrels.iterator();
- if(qrels.size() == 1)
- fromRels.add(k.next());
- else
- while(i.hasNext()) {
- DefaultEdge aux = i.next();
- // If ds_in or ds_out were not in the original select clause's relations and they are on the the joinEdges
- // then one has to make sure that both consumed and produced datasets are considered in the join so there
- // is no loss of information. One alternative, implemented here, is to replace these occurrences by the ds
- // view, which is an union of ds_in and ds_out.
- if(qrels.contains("ds_in") || qrels.contains("ds_out")) {
- fromRels.add(schemaGraph.getEdgeSource(aux));
- fromRels.add(schemaGraph.getEdgeTarget(aux));
- }
- else {
- if(aux.equals(schemaGraph.getEdge("ds_in","proc")) ||
- aux.equals(schemaGraph.getEdge("ds_in","ds")) ||
- aux.equals(schemaGraph.getEdge("ds_out","proc")) ||
- aux.equals(schemaGraph.getEdge("ds_out","ds"))) {
- fromRels.add("ds");
- fromRels.add("ds_use");
- fromRels.add("proc");
- }
- else {
- fromRels.add(schemaGraph.getEdgeSource(aux));
- fromRels.add(schemaGraph.getEdgeTarget(aux));
- }
- }
- }
- Iterator<String> j = fromRels.iterator();
- if(j.hasNext())
- fromq += j.next();
- while(j.hasNext())
- fromq += "," + j.next();
- if(hasCompareRunCall) {
- if(fromRels.size() > 0)
- fromq += ",";
- fromq += "(" + computeCompareRunQuery(compareRunParams) + ") AS compare_run";
- }
- return fromq;
- }
-
-
- public static String computeJoinExpressions(UndirectedGraph<String,DefaultEdge> schemaGraph, HashSet<DefaultEdge> jEdges, HashSet<String> qrels) {
-
- HashMap<DefaultEdge,String> joinExpressions = new HashMap<DefaultEdge, String>();
- String joinExpressionsString = new String();
-
- joinExpressions.put(schemaGraph.getEdge("a_run_t", "run"), "a_run_t.run_id=run.id");
- joinExpressions.put(schemaGraph.getEdge("a_run_n", "run"), "a_run_n.run_id=run.id");
- joinExpressions.put(schemaGraph.getEdge("run", "proc"), "run.id=proc.run_id");
- joinExpressions.put(schemaGraph.getEdge("proc", "a_proc_t"), "proc.id=a_proc_t.proc_id");
- joinExpressions.put(schemaGraph.getEdge("proc", "a_proc_n"), "proc.id=a_proc_n.proc_id");
- joinExpressions.put(schemaGraph.getEdge("proc", "ds_out"), "proc.id=ds_out.proc_id");
- joinExpressions.put(schemaGraph.getEdge("proc", "ds_in"), "proc.id=ds_in.proc_id");
- joinExpressions.put(schemaGraph.getEdge("proc", "app_inv"), "proc.id=app_inv.id");
- joinExpressions.put(schemaGraph.getEdge("app_inv", "app_exec"), "app_inv.id=app_exec.app_inv_id");
- joinExpressions.put(schemaGraph.getEdge("app_exec", "rt_info"), "app_exec.id=rt_info.app_exec_id");
- joinExpressions.put(schemaGraph.getEdge("ds", "ds_in"), "ds.id=ds_in.ds_id");
- joinExpressions.put(schemaGraph.getEdge("ds", "ds_out"), "ds.id=ds_out.ds_id");
- joinExpressions.put(schemaGraph.getEdge("ds", "a_ds_t"), "ds.id=a_ds_t.ds_id");
- joinExpressions.put(schemaGraph.getEdge("ds", "a_ds_n"), "ds.id=a_ds_n.ds_id");
- joinExpressions.put(schemaGraph.getEdge("ds", "file"), "ds.id=file.id");
- joinExpressions.put(schemaGraph.getEdge("ds", "in_mem"), "ds.id=in_mem.id");
- joinExpressions.put(schemaGraph.getEdge("ds", "ds_cont"), "ds.id=ds_cont.in_id");
- joinExpressions.put(schemaGraph.getEdge("ds", "ds_cont"), "ds.id=ds_cont.out_id");
-
- Iterator<DefaultEdge> i = jEdges.iterator();
- if(i.hasNext()) {
- DefaultEdge aux = i.next();
- if(qrels.contains("ds_in") || qrels.contains("ds_out")) {
- joinExpressionsString = joinExpressions.get(aux);
- }
- else {
- if(aux.equals(schemaGraph.getEdge("ds_in","proc")) || aux.equals(schemaGraph.getEdge("ds_out","proc")))
- joinExpressionsString = "ds_use.proc_id=proc.id";
- else if(aux.equals(schemaGraph.getEdge("ds_in","ds")) || aux.equals(schemaGraph.getEdge("ds_out","ds")))
- joinExpressionsString = "ds_use.ds_id=ds.id";
- else {
- joinExpressionsString = joinExpressions.get(aux);
- }
-
- }
- }
-
-
- while(i.hasNext()) {
- DefaultEdge aux = i.next();
- if(qrels.contains("ds_in") || qrels.contains("ds_out")) {
- joinExpressionsString += " AND " + joinExpressions.get(aux);
- }
- else {
- if(aux.equals(schemaGraph.getEdge("ds_in","proc")) || aux.equals(schemaGraph.getEdge("ds_out","proc")))
- joinExpressionsString += " AND " + "ds_use.proc_id=proc.id";
- else if(aux.equals(schemaGraph.getEdge("ds_in","ds")) || aux.equals(schemaGraph.getEdge("ds_out","ds")))
- joinExpressionsString += " AND " + "ds_use.ds_id=ds.id";
- else {
- joinExpressionsString += " AND " + joinExpressions.get(aux);
- }
-
- }
- }
- return joinExpressionsString;
- }
-
- public static String computeCompareRunQuery(HashSet<String> atoms) {
- String compareRunSelectClause = "SELECT run_id";
- String compareRunFromClause = "FROM";
- Iterator<String> i = atoms.iterator();
- int nId = 0;
- for(String arg: atoms) {
- String[] argTokens = arg.split("=");
- if(argTokens[0].equals("key_numeric") ||
- argTokens[0].equals("key_text") ||
- argTokens[0].equals("parameter"))
- {
- String key = argTokens[1].split("'")[1];
- nId++;
- String sId = "j" + nId;
- compareRunSelectClause+=", " + sId + ".value as " + key;
- if(nId>1)
- compareRunFromClause += " INNER JOIN";
- compareRunFromClause += " compare_run_by_" + argTokens[0] + "(\'" + key + "\') as " + sId;
- if(nId>1)
- compareRunFromClause += " USING (run_id)";
- }
- }
- String compareRunQuery = compareRunSelectClause + " " + compareRunFromClause;
- return compareRunQuery;
- }
-
-}
-
-query : squery (
- (
- UNION { System.out.println(" UNION "); }
- |
- INTERSECT { System.out.println(" INTERSECT "); }
- |
- EXCEPT { System.out.println(" EXCEPT "); }
- )
- (
- ALL { System.out.println(" ALL "); }
- )?
- squery
- )*
- SEMICOLON
- {
- System.out.print(";");
- }
-;
-
-squery : SELECT
- {
- System.out.print("SELECT ");
- }
- (
- DISTINCT
- {
- System.out.print("DISTINCT ");
- }
- )?
- selectExpression
- {
- System.out.print(selectClause);
- }
- (WHERE whereExpression
- {
- hasWhereExpression=true;
- }
- )?
- {
- schemaGraph = buildGraph();
- joinEdges = computeJoinEdges(schemaGraph, relations);
- hasWhereJoinExpression=false;
-
- fromClause += computeFrom(schemaGraph, joinEdges, relations);
-
- System.out.print(fromClause);
-
- whereClauseJoinExpressions += computeJoinExpressions(schemaGraph, joinEdges, relations);
-
- if(!whereClauseJoinExpressions.isEmpty()) {
- hasWhereJoinExpression=true;
- System.out.print(" WHERE " + whereClauseJoinExpressions);
- }
-
- if(hasWhereExpression) {
- if(hasWhereJoinExpression)
- System.out.print(" AND ");
- else
- System.out.print(" WHERE ");
- System.out.print(whereClause);
- }
- }
- (
- GROUP BY
- {
- System.out.print(" GROUP BY ");
- }
- a=entityAndAttribute
- {
- System.out.print($a.text);
- }
- (
- COLON
- b=entityAndAttribute
- {
- System.out.print(",");
- System.out.print($b.text);
- }
- )*
- (
- HAVING { System.out.print(" HAVING "); }
- havingExpression
- )?
- )?
- (
- ORDER BY
- {
- System.out.print(" ORDER BY ");
- }
- (
- c=entityAndAttribute
- {
- System.out.print($c.text);
- }
- |
- COUNT { System.out.print(" COUNT "); }
- |
- e=AGGRFUN { System.out.print(" " + $e.text + " "); }
- )
- (
- COLON { System.out.print(","); }
- (
- d=entityAndAttribute
- {
- System.out.print($d.text);
- }
- |
- COUNT { System.out.print(" COUNT "); }
- |
- f=AGGRFUN { System.out.print(" " + $f.text + " "); }
-
- )
- )*
- (
- DESC { System.out.print(" DESC "); }
- |
- ASC { System.out.print(" ASC "); }
- )?
- )?
- |
- '(' { System.out.print("("); }
- squery
- ')' { System.out.print(")"); }
- ;
-
-
-selectAtom
- : a=entityAttribute
- {
- selectClause += $a.text;
- relations.add($a.text.split("\\.")[0]);
- if($a.text.split("\\.").length == 1)
- selectClause += ".*";
- }
- |
- b=AGGRFUN
- {
- selectClause+=$b.text;
- }
- '(' { selectClause+="("; }
- c=entityAndAttribute
- {
- selectClause += $c.text;
- relations.add($c.text.split("\\.")[0]);
- if($c.text.split("\\.").length == 1)
- selectClause += ".*";
- }
- ')' { selectClause+=")"; }
- |
- d=COUNT
- {
- selectClause+=$d.text;
- }
- '(' { selectClause+="("; }
- (
- e=entityAttribute
- {
- selectClause += $e.text;
- relations.add($e.text.split("\\.")[0]);
- if($e.text.split("\\.").length == 1)
- selectClause += ".*";
- }
- |
- '*' { selectClause+="*"; }
- )
- ')' { selectClause+=")"; }
- |
- builtInProcedureAttribute
- ;
-
-selectExpression
- : (
- selectAtom
- )
- (COLON { selectClause+=","; }
- (
- selectAtom
- )
- )*
- ;
-
-whereExpression
- : whereAtom
- (
- (AND
- {
- whereClause += " AND ";
- }
- | OR
- {
- whereClause += " OR ";
- }
- ) whereAtom
- )*
- ;
-
-whereAtom
- : (a=entityAndAttribute
- {
- relations.add($a.text.split("\\.")[0]);
- whereClause += $a.text;
- }
- |
- j=COMPARERUN { whereClause+="comapare_run"; }
- DOT
- k=ID { whereClause+="."+$k.text; }
- )
- (
- NOT
- {
- whereClause += " NOT ";
- }
- )?
-
- (
- b=OP
- {
- whereClause += $b.text;
- }
- (
- c=STRING
- {
- whereClause += $c.text;
- }
- |
- d=INT
- {
- whereClause += $d.text;
- }
- |
- e=FLOAT
- {
- whereClause += $e.text;
- }
- )
- |
- BETWEEN
- {
- whereClause += " BETWEEN ";
- }
- f=STRING
- {
- whereClause += $f.text;
- }
- AND
- {
- whereClause += " AND ";
- }
- g=STRING
- {
- whereClause += $g.text;
- }
- |
- LIKE
- {
- whereClause += " LIKE ";
- }
- h=STRING
- {
- whereClause += $h.text;
- }
- |
- (
- IN
- {
- whereClause += " IN ";
- }
- |
- i=OP
- {
- whereClause += $i.text;
- }
-
- (
- ALL
- {
- whereClause += " ALL ";
- }
- |
- ANY
- {
- whereClause += " ANY ";
- }
- )
-
- )
- '(' { System.out.print("("); }
- squery
- ')' { System.out.print(")"); }
- )
- ;
-
-havingExpression
- : havingAtom
- (
- (AND
- {
- System.out.print(" AND ");
- }
- | OR
- {
- System.out.print(" OR ");
- }
- ) havingAtom
- )*
- ;
-
-
-havingAtom
- : a=entityAndAttribute
- {
- System.out.print($a.text);
- }
- (
- NOT
- {
- System.out.print(" NOT ");
- }
- )?
-
- (
- b=OP
- {
- System.out.print($b.text);
- }
- (
- c=STRING
- {
- System.out.print($c.text);
- }
- |
- d=INT
- {
- System.out.print($d.text);
- }
- |
- e=FLOAT
- {
- System.out.print($e.text);
- }
- )
- |
- BETWEEN
- {
- System.out.print(" BETWEEN ");
- }
- f=STRING
- {
- System.out.print($f.text);
- }
- AND
- {
- System.out.print(" AND ");
- }
- g=STRING
- {
- System.out.print($g.text);
- }
- |
- LIKE
- {
- System.out.print(" BETWEEN ");
- }
- h=STRING
- {
- System.out.print($h.text);
- }
- |
- (
- IN
- {
- System.out.print(" IN ");
- }
- |
- i=OP
- {
- System.out.print($i.text);
- }
-
- (
- ALL
- {
- System.out.print(" ALL ");
- }
- |
- ANY
- {
- System.out.print(" ANY ");
- }
- )
-
- )
- '(' { System.out.print("("); }
- squery
- ')' { System.out.print(")"); }
- )
- ;
-
-
-entityAttribute : ID (DOT ID)?;
-
-entityAndAttribute
- : ID DOT ID;
-
-
-builtInProcedureAttribute
- : COMPARERUN {
- boolean hasAttribute = false;
- }
- {
- hasCompareRunCall=true;
- }
- '('
- a=builtInAtom
- {
- compareRunParams.add($a.text);
- if(relations.size() > 0)
- relations.add("run");
- }
- (COLON
- b=builtInAtom
- {
- compareRunParams.add($b.text);
- if(relations.size() > 0)
- relations.add("run");
- }
- )* ')' (
- DOT
- {
- hasAttribute = true;
- }
- (
- c=ID
- {
- selectClause += "compare_run." + $c.text;
- }
- | '{'
- d=ID
- {
- selectClause += "compare_run." + $d.text;
- }
- (COLON
- e=ID
- {
- selectClause += ", compare_run." + $e.text;
- }
- )* '}'))?
- {
- if(!hasAttribute)
- selectClause += "compare_run.*";
- }
-;
-
-builtInAtom
- : ('parameter' | 'key_numeric' | 'key_text') OP STRING;
-
-OP : '=' | '>' | '>=' | '<' | '<=';
-
-GROUP : 'group';
-
-ORDER : 'order';
-
-COMPARERUN
- : 'compare_run';
-
-ANCESTOR: 'ancestor';
-
-BY : 'by';
-
-AGGRFUN : 'avg' | 'max' | 'min' | 'sum';
-
-COUNT : 'count';
-
-SELECT : 'select';
-
-DESC : 'desc';
-
-ASC : 'asc';
-
-
-DISTINCT
- : 'distinct';
-
-WHERE : 'where';
-
-AND : 'and';
-
-OR : 'or';
-
-NOT : 'not';
-
-IN : 'in';
-
-ANY : 'any';
-
-UNION : 'union';
-
-INTERSECT
- : 'intersect';
-
-EXCEPT : 'except';
-
-ALL : 'all';
-
-DOT : '.';
-
-COLON : ',';
-
-BETWEEN : 'between';
-
-HAVING : 'having';
-
-LIKE : 'like';
-
-SEMICOLON : ';';
-
-ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_'|'-')*
- ;
-
-INT : '0'..'9'+
- ;
-
-FLOAT
- : ('0'..'9')+ '.' ('0'..'9')*
- | '.' ('0'..'9')+
- | ('0'..'9')+
- ;
-
-STRING
- : '\'' ( 'a'..'z' | 'A'..'Z' | '_' | '-' | '0'..'9' | '.' | '%')* '\''
- ;
-
-NEWLINE : '\r' ? '\n';
-
-WS : (' ' |'\t' |'\n' |'\r' )+
- {
- skip();
- }
- ;
Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql 2013-11-29 08:52:22 UTC (rev 7346)
+++ provenancedb/prov-init.sql 2013-11-29 13:26:05 UTC (rev 7347)
@@ -329,4 +329,29 @@
create view script_run_summary as
select id,swift_version,cog_version,final_state,
start_time,duration,script_filename
- from script_run;
\ No newline at end of file
+ from script_run;
+
+create view annotation_text as
+ select dataset_id as entity_id, name, value as text_value, 'dataset' as entity_type from annot_dataset_text
+ union all
+ select function_call_id as entity_id, value as text_value, 'function_call' as entity_type from annot_function_call_text
+ union all
+ select app_exec_id as entity_id, value as text_value, 'app_exec' as entity_type from annot_app_exec_text
+ union all
+ select script_run_id as entity_id, value as text_value, 'script_run' as entity from annot_script_run_text;
+
+create view annotation_numeric as
+ select dataset_id as entity_id, name, value as numeric_value, 'dataset' as entity_type from annot_dataset_num
+ union all
+ select function_call_id as entity_id, value as numeric_value, 'function_call' as entity_type from annot_function_call_num
+ union all
+ select app_exec_id as entity_id, value as numeric_value, 'app_exec' as entity_type from annot_app_exec_num
+ union all
+ select script_run_id as entity_id, value as numeric_value, 'script_run' as entity from annot_script_run_num;
+
+
+create view annotation as
+ select entity_id, entity_type, NULL as numeric_value, text_value from annotation_text
+ union all
+ select entity_id, entity_type, numeric_value, NULL as text_value from annotation_numeric;
+
\ No newline at end of file
More information about the Swift-commit
mailing list