[Swift-commit] r5517 - provenancedb
lgadelha at ci.uchicago.edu
lgadelha at ci.uchicago.edu
Tue Jan 24 12:38:35 CST 2012
Author: lgadelha
Date: 2012-01-24 12:38:35 -0600 (Tue, 24 Jan 2012)
New Revision: 5517
Added:
provenancedb/SPQL.g
Modified:
provenancedb/prov-init.sql
Log:
Minor changes
Added: provenancedb/SPQL.g
===================================================================
--- provenancedb/SPQL.g (rev 0)
+++ provenancedb/SPQL.g 2012-01-24 18:38:35 UTC (rev 5517)
@@ -0,0 +1,743 @@
+grammar ProvSQL;
+
+ at header {
+ import java.util.HashSet;
+ import java.util.HashMap;
+ import java.util.Iterator;
+ import org.jgrapht.*;
+ import org.jgrapht.alg.DijkstraShortestPath;
+ import org.jgrapht.graph.*;
+}
+
+ at members {
+ String selectClause = new String();
+ String fromClause = new String();
+ String whereClauseJoinExpressions = new String();
+ String whereClause = new String();
+ boolean hasWhereJoinExpression;
+ boolean hasWhereExpression = false;
+ static boolean hasCompareRunCall = false;
+ HashSet<String> relations = new HashSet<String>();
+ UndirectedGraph<String,DefaultEdge> schemaGraph;
+ HashSet<DefaultEdge> joinEdges;
+ static HashSet<String> compareRunParams = new HashSet<String>();;
+
+ // Ideally it could receive a DB schema in SQL and build the graph automatically
+ public static UndirectedGraph<String,DefaultEdge> buildGraph() {
+ UndirectedGraph<String,DefaultEdge> schemaGraph = new Multigraph<String,DefaultEdge>(DefaultEdge.class);
+ schemaGraph.addVertex("annotation");
+ schemaGraph.addVertex("script_run");
+ schemaGraph.addVertex("function_call");
+ schemaGraph.addVertex("variable");
+ schemaGraph.addVertex("application_execution");
+ schemaGraph.addVertex("runtime_info");
+ schemaGraph.addVertex("contains");
+ schemaGraph.addVertex("produces");
+ schemaGraph.addVertex("consumes");
+ schemaGraph.addVertex("compare_run");
+ schemaGraph.addEdge("annotation", "script_run");
+ schemaGraph.addEdge("annotation", "function_call");
+ schemaGraph.addEdge("annotation", "variable");
+ schemaGraph.addEdge("script_run", "function_call");
+ schemaGraph.addEdge("function_call", "consumes");
+ schemaGraph.addEdge("function_call", "produces");
+ schemaGraph.addEdge("function_call", "application_execution");
+ schemaGraph.addEdge("application_execution", "runtime_info");
+ schemaGraph.addEdge("variable", "variable_containment");
+ schemaGraph.addEdge("variable", "variable_containment");
+ schemaGraph.addEdge("variable", "consumes");
+ schemaGraph.addEdge("variable", "produces");
+
+ return schemaGraph;
+ }
+
+ private static HashSet<DefaultEdge> computeJoinEdges(
+ UndirectedGraph<String, DefaultEdge> schemaGraph,
+ HashSet<String> relations) {
+ HashSet<DefaultEdge> jEdges = new HashSet<DefaultEdge>();
+ Iterator<String> i = relations.iterator();
+ String first = new String();
+ if(i.hasNext())
+ first += i.next();
+ while(i.hasNext()) {
+ DijkstraShortestPath<String, DefaultEdge> sP = new DijkstraShortestPath<String, DefaultEdge>(schemaGraph, first, i.next());
+ Iterator<DefaultEdge> j = (sP.getPathEdgeList()).iterator();
+ while(j.hasNext())
+ jEdges.add(j.next());
+ }
+ return jEdges;
+ }
+
+ public static String computeFrom(UndirectedGraph<String,DefaultEdge> schemaGraph, HashSet<DefaultEdge> joinEdges, HashSet<String> qrels) {
+ HashSet<String> fromRels = new HashSet<String>();
+ String fromq = " FROM ";
+ Iterator<DefaultEdge> i = joinEdges.iterator();
+ Iterator<String> k = qrels.iterator();
+ if(qrels.size() == 1)
+ fromRels.add(k.next());
+ else
+ while(i.hasNext()) {
+ DefaultEdge aux = i.next();
+ // If ds_in or ds_out were not in the original select clause's relations and they are on the the joinEdges
+ // then one has to make sure that both consumed and produced datasets are considered in the join so there
+ // is no loss of information. One alternative, implemented here, is to replace these occurrences by the ds
+ // view, which is an union of ds_in and ds_out.
+ if(qrels.contains("produces") || qrels.contains("consumes")) {
+ fromRels.add(schemaGraph.getEdgeSource(aux));
+ fromRels.add(schemaGraph.getEdgeTarget(aux));
+ }
+ else {
+ if(aux.equals(schemaGraph.getEdge("consumes","function_call")) ||
+ aux.equals(schemaGraph.getEdge("consumes","variable")) ||
+ aux.equals(schemaGraph.getEdge("produces","function_call")) ||
+ aux.equals(schemaGraph.getEdge("produces","variable"))) {
+ fromRels.add("variable");
+ fromRels.add("ds_use");
+ fromRels.add("function_call");
+ }
+ else {
+ fromRels.add(schemaGraph.getEdgeSource(aux));
+ fromRels.add(schemaGraph.getEdgeTarget(aux));
+ }
+ }
+ }
+ Iterator<String> j = fromRels.iterator();
+ if(j.hasNext())
+ fromq += j.next();
+ while(j.hasNext())
+ fromq += "," + j.next();
+ if(hasCompareRunCall) {
+ if(fromRels.size() > 0)
+ fromq += ",";
+ fromq += "(" + computeCompareRunQuery(compareRunParams) + ") AS compare_run";
+ }
+ return fromq;
+ }
+
+
+ public static String computeJoinExpressions(UndirectedGraph<String,DefaultEdge> schemaGraph, HashSet<DefaultEdge> jEdges, HashSet<String> qrels) {
+
+ HashMap<DefaultEdge,String> joinExpressions = new HashMap<DefaultEdge, String>();
+ String joinExpressionsString = new String();
+
+ joinExpressions.put(schemaGraph.getEdge("annotation", "script_run"), "annotation.script_run_id=script_run.id");
+ joinExpressions.put(schemaGraph.getEdge("script_run", "function_call"), "script_run.id=function_call.script_run_id");
+ joinExpressions.put(schemaGraph.getEdge("function_call", "annotation"), "function_call.id=annotation.function_call_id");
+ joinExpressions.put(schemaGraph.getEdge("function_call", "annotation"), "function_call.id=annotation.function_call_id");
+ joinExpressions.put(schemaGraph.getEdge("function_call", "produces"), "function_call.id=produces.function_call_id");
+ joinExpressions.put(schemaGraph.getEdge("function_call", "consumes"), "function_call.id=consumes.function_call_id");
+ joinExpressions.put(schemaGraph.getEdge("application_execution", "rt_info"), "application_execution.id=rt_info.application_execution_id");
+ joinExpressions.put(schemaGraph.getEdge("variable", "consumes"), "variable.id=consumes.variable_id");
+ joinExpressions.put(schemaGraph.getEdge("variable", "produces"), "variable.id=produces.variable_id");
+ joinExpressions.put(schemaGraph.getEdge("variable", "annotation"), "variable.id=annotation.variable_id");
+ joinExpressions.put(schemaGraph.getEdge("variable", "annotation"), "variable.id=annotation.variable_id");
+ joinExpressions.put(schemaGraph.getEdge("variable", "file"), "variable.id=file.id");
+ joinExpressions.put(schemaGraph.getEdge("variable", "in_mem"), "variable.id=in_mem.id");
+ joinExpressions.put(schemaGraph.getEdge("variable", "containment"), "variable.id=containment.containee");
+ joinExpressions.put(schemaGraph.getEdge("variable", "containment"), "variable.id=containment.container");
+
+ Iterator<DefaultEdge> i = jEdges.iterator();
+ if(i.hasNext()) {
+ DefaultEdge aux = i.next();
+ if(qrels.contains("consumes") || qrels.contains("produces")) {
+ joinExpressionsString = joinExpressions.get(aux);
+ }
+ else {
+ if(aux.equals(schemaGraph.getEdge("consumes","function_call")) || aux.equals(schemaGraph.getEdge("produces","function_call")))
+ joinExpressionsString = "ds_use.function_call_id=function_call.id";
+ else if(aux.equals(schemaGraph.getEdge("consumes","variable")) || aux.equals(schemaGraph.getEdge("produces","variable")))
+ joinExpressionsString = "ds_use.variable_id=variable.id";
+ else {
+ joinExpressionsString = joinExpressions.get(aux);
+ }
+
+ }
+ }
+
+
+ while(i.hasNext()) {
+ DefaultEdge aux = i.next();
+ if(qrels.contains("consumes") || qrels.contains("produces")) {
+ joinExpressionsString += " AND " + joinExpressions.get(aux);
+ }
+ else {
+ if(aux.equals(schemaGraph.getEdge("consumes","function_call")) || aux.equals(schemaGraph.getEdge("produces","function_call")))
+ joinExpressionsString += " AND " + "ds_use.function_call_id=function_call.id";
+ else if(aux.equals(schemaGraph.getEdge("consumes","variable")) || aux.equals(schemaGraph.getEdge("produces","variable")))
+ joinExpressionsString += " AND " + "ds_use.variable_id=variable.id";
+ else {
+ joinExpressionsString += " AND " + joinExpressions.get(aux);
+ }
+
+ }
+ }
+ return joinExpressionsString;
+ }
+
+ public static String computeCompareRunQuery(HashSet<String> atoms) {
+ String compareRunSelectClause = "SELECT script_run_id";
+ String compareRunFromClause = "FROM";
+ Iterator<String> i = atoms.iterator();
+ int nId = 0;
+ for(String arg: atoms) {
+ String[] argTokens = arg.split("=");
+ if(argTokens[0].equals("key_numeric") ||
+ argTokens[0].equals("key_text") ||
+ argTokens[0].equals("parameter"))
+ {
+ String key = argTokens[1].split("'")[1];
+ nId++;
+ String sId = "j" + nId;
+ compareRunSelectClause+=", " + sId + ".value as " + key;
+ if(nId>1)
+ compareRunFromClause += " INNER JOIN";
+ compareRunFromClause += " compare_run_by_" + argTokens[0] + "(\'" + key + "\') as " + sId;
+ if(nId>1)
+ compareRunFromClause += " USING (script_run_id)";
+ }
+ }
+ String compareRunQuery = compareRunSelectClause + " " + compareRunFromClause;
+ return compareRunQuery;
+ }
+
+}
+
+query : squery (
+ (
+ UNION { System.out.println(" UNION "); }
+ |
+ INTERSECT { System.out.println(" INTERSECT "); }
+ |
+ EXCEPT { System.out.println(" EXCEPT "); }
+ )
+ (
+ ALL { System.out.println(" ALL "); }
+ )?
+ squery
+ )*
+ SEMICOLON
+ {
+ System.out.print(";");
+ }
+;
+
+squery : SELECT
+ {
+ System.out.print("SELECT ");
+ }
+ (
+ DISTINCT
+ {
+ System.out.print("DISTINCT ");
+ }
+ )?
+ selectExpression
+ {
+ System.out.print(selectClause);
+ }
+ (WHERE whereExpression
+ {
+ hasWhereExpression=true;
+ }
+ )?
+ {
+ schemaGraph = buildGraph();
+ joinEdges = computeJoinEdges(schemaGraph, relations);
+ hasWhereJoinExpression=false;
+
+ fromClause += computeFrom(schemaGraph, joinEdges, relations);
+
+ System.out.print(fromClause);
+
+ whereClauseJoinExpressions += computeJoinExpressions(schemaGraph, joinEdges, relations);
+
+ if(!whereClauseJoinExpressions.isEmpty()) {
+ hasWhereJoinExpression=true;
+ System.out.print(" WHERE " + whereClauseJoinExpressions);
+ }
+
+ if(hasWhereExpression) {
+ if(hasWhereJoinExpression)
+ System.out.print(" AND ");
+ else
+ System.out.print(" WHERE ");
+ System.out.print(whereClause);
+ }
+ }
+ (
+ GROUP BY
+ {
+ System.out.print(" GROUP BY ");
+ }
+ a=entityAndAttribute
+ {
+ System.out.print($a.text);
+ }
+ (
+ COLON
+ b=entityAndAttribute
+ {
+ System.out.print(",");
+ System.out.print($b.text);
+ }
+ )*
+ (
+ HAVING { System.out.print(" HAVING "); }
+ havingExpression
+ )?
+ )?
+ (
+ ORDER BY
+ {
+ System.out.print(" ORDER BY ");
+ }
+ (
+ c=entityAndAttribute
+ {
+ System.out.print($c.text);
+ }
+ |
+ COUNT { System.out.print(" COUNT "); }
+ |
+ e=AGGRFUN { System.out.print(" " + $e.text + " "); }
+ )
+ (
+ COLON { System.out.print(","); }
+ (
+ d=entityAndAttribute
+ {
+ System.out.print($d.text);
+ }
+ |
+ COUNT { System.out.print(" COUNT "); }
+ |
+ f=AGGRFUN { System.out.print(" " + $f.text + " "); }
+
+ )
+ )*
+ (
+ DESC { System.out.print(" DESC "); }
+ |
+ ASC { System.out.print(" ASC "); }
+ )?
+ )?
+ |
+ '(' { System.out.print("("); }
+ squery
+ ')' { System.out.print(")"); }
+ ;
+
+
+selectAtom
+ : a=entityAttribute
+ {
+ selectClause += $a.text;
+ relations.add($a.text.split("\\.")[0]);
+ if($a.text.split("\\.").length == 1)
+ selectClause += ".*";
+ }
+ |
+ b=AGGRFUN
+ {
+ selectClause+=$b.text;
+ }
+ '(' { selectClause+="("; }
+ c=entityAndAttribute
+ {
+ selectClause += $c.text;
+ relations.add($c.text.split("\\.")[0]);
+ if($c.text.split("\\.").length == 1)
+ selectClause += ".*";
+ }
+ ')' { selectClause+=")"; }
+ |
+ d=COUNT
+ {
+ selectClause+=$d.text;
+ }
+ '(' { selectClause+="("; }
+ (
+ e=entityAttribute
+ {
+ selectClause += $e.text;
+ relations.add($e.text.split("\\.")[0]);
+ if($e.text.split("\\.").length == 1)
+ selectClause += ".*";
+ }
+ |
+ '*' { selectClause+="*"; }
+ )
+ ')' { selectClause+=")"; }
+ |
+ builtInProcedureAttribute
+ ;
+
+selectExpression
+ : (
+ selectAtom
+ )
+ (COLON { selectClause+=","; }
+ (
+ selectAtom
+ )
+ )*
+ ;
+
+whereExpression
+ : whereAtom
+ (
+ (AND
+ {
+ whereClause += " AND ";
+ }
+ | OR
+ {
+ whereClause += " OR ";
+ }
+ ) whereAtom
+ )*
+ ;
+
+whereAtom
+ : (a=entityAndAttribute
+ {
+ relations.add($a.text.split("\\.")[0]);
+ whereClause += $a.text;
+ }
+ |
+ j=COMPARERUN { whereClause+="comapare_run"; }
+ DOT
+ k=ID { whereClause+="."+$k.text; }
+ )
+ (
+ NOT
+ {
+ whereClause += " NOT ";
+ }
+ )?
+
+ (
+ b=OP
+ {
+ whereClause += $b.text;
+ }
+ (
+ c=STRING
+ {
+ whereClause += $c.text;
+ }
+ |
+ d=INT
+ {
+ whereClause += $d.text;
+ }
+ |
+ e=FLOAT
+ {
+ whereClause += $e.text;
+ }
+ )
+ |
+ BETWEEN
+ {
+ whereClause += " BETWEEN ";
+ }
+ f=STRING
+ {
+ whereClause += $f.text;
+ }
+ AND
+ {
+ whereClause += " AND ";
+ }
+ g=STRING
+ {
+ whereClause += $g.text;
+ }
+ |
+ LIKE
+ {
+ whereClause += " LIKE ";
+ }
+ h=STRING
+ {
+ whereClause += $h.text;
+ }
+ |
+ (
+ IN
+ {
+ whereClause += " IN ";
+ }
+ |
+ i=OP
+ {
+ whereClause += $i.text;
+ }
+
+ (
+ ALL
+ {
+ whereClause += " ALL ";
+ }
+ |
+ ANY
+ {
+ whereClause += " ANY ";
+ }
+ )
+
+ )
+ '(' { System.out.print("("); }
+ squery
+ ')' { System.out.print(")"); }
+ )
+ ;
+
+havingExpression
+ : havingAtom
+ (
+ (AND
+ {
+ System.out.print(" AND ");
+ }
+ | OR
+ {
+ System.out.print(" OR ");
+ }
+ ) havingAtom
+ )*
+ ;
+
+
+havingAtom
+ : a=entityAndAttribute
+ {
+ System.out.print($a.text);
+ }
+ (
+ NOT
+ {
+ System.out.print(" NOT ");
+ }
+ )?
+
+ (
+ b=OP
+ {
+ System.out.print($b.text);
+ }
+ (
+ c=STRING
+ {
+ System.out.print($c.text);
+ }
+ |
+ d=INT
+ {
+ System.out.print($d.text);
+ }
+ |
+ e=FLOAT
+ {
+ System.out.print($e.text);
+ }
+ )
+ |
+ BETWEEN
+ {
+ System.out.print(" BETWEEN ");
+ }
+ f=STRING
+ {
+ System.out.print($f.text);
+ }
+ AND
+ {
+ System.out.print(" AND ");
+ }
+ g=STRING
+ {
+ System.out.print($g.text);
+ }
+ |
+ LIKE
+ {
+ System.out.print(" BETWEEN ");
+ }
+ h=STRING
+ {
+ System.out.print($h.text);
+ }
+ |
+ (
+ IN
+ {
+ System.out.print(" IN ");
+ }
+ |
+ i=OP
+ {
+ System.out.print($i.text);
+ }
+
+ (
+ ALL
+ {
+ System.out.print(" ALL ");
+ }
+ |
+ ANY
+ {
+ System.out.print(" ANY ");
+ }
+ )
+
+ )
+ '(' { System.out.print("("); }
+ squery
+ ')' { System.out.print(")"); }
+ )
+ ;
+
+
+entityAttribute : ID (DOT ID)?;
+
+entityAndAttribute
+ : ID DOT ID;
+
+
+builtInProcedureAttribute
+ : COMPARERUN {
+ boolean hasAttribute = false;
+ }
+ {
+ hasCompareRunCall=true;
+ }
+ '('
+ a=builtInAtom
+ {
+ compareRunParams.add($a.text);
+ if(relations.size() > 0)
+ relations.add("script_run");
+ }
+ (COLON
+ b=builtInAtom
+ {
+ compareRunParams.add($b.text);
+ if(relations.size() > 0)
+ relations.add("script_run");
+ }
+ )* ')' (
+ DOT
+ {
+ hasAttribute = true;
+ }
+ (
+ c=ID
+ {
+ selectClause += "compare_run." + $c.text;
+ }
+ | '{'
+ d=ID
+ {
+ selectClause += "compare_run." + $d.text;
+ }
+ (COLON
+ e=ID
+ {
+ selectClause += ", compare_run." + $e.text;
+ }
+ )* '}'))?
+ {
+ if(!hasAttribute)
+ selectClause += "compare_run.*";
+ }
+;
+
+builtInAtom
+ : ('parameter' | 'key_numeric' | 'key_text') OP STRING;
+
+OP : '=' | '>' | '>=' | '<' | '<=';
+
+GROUP : 'group';
+
+ORDER : 'order';
+
+COMPARERUN
+ : 'compare_run';
+
+ANCESTOR: 'ancestor';
+
+BY : 'by';
+
+AGGRFUN : 'avg' | 'max' | 'min' | 'sum';
+
+COUNT : 'count';
+
+SELECT : 'select';
+
+DESC : 'desc';
+
+ASC : 'asc';
+
+
+DISTINCT
+ : 'distinct';
+
+WHERE : 'where';
+
+AND : 'and';
+
+OR : 'or';
+
+NOT : 'not';
+
+IN : 'in';
+
+ANY : 'any';
+
+UNION : 'union';
+
+INTERSECT
+ : 'intersect';
+
+EXCEPT : 'except';
+
+ALL : 'all';
+
+DOT : '.';
+
+COLON : ',';
+
+BETWEEN : 'between';
+
+HAVING : 'having';
+
+LIKE : 'like';
+
+SEMICOLON : ';';
+
+ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_'|'-')*
+ ;
+
+INT : '0'..'9'+
+ ;
+
+FLOAT
+ : ('0'..'9')+ '.' ('0'..'9')*
+ | '.' ('0'..'9')+
+ | ('0'..'9')+
+ ;
+
+STRING
+ : '\'' ( 'a'..'z' | 'A'..'Z' | '_' | '-' | '0'..'9' | '.' | '%')* '\''
+ ;
+
+NEWLINE : '\r' ? '\n';
+
+WS : (' ' |'\t' |'\n' |'\r' )+
+ {
+ skip();
+ }
+ ;
Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql 2012-01-23 21:49:09 UTC (rev 5516)
+++ provenancedb/prov-init.sql 2012-01-24 18:38:35 UTC (rev 5517)
@@ -207,32 +207,36 @@
-- continue renaming from here
-CREATE VIEW a_t AS
- SELECT *
- FROM a_run_t
- UNION ALL
- SELECT *
- FROM a_ds_t
- UNION ALL
- SELECT *
- FROM a_proc_t;
+create view a_t as
+ select *
+ from a_run_t
+ union all
+ select *
+ from a_ds_t
+ union all
+ select *
+ from a_proc_t;
-CREATE VIEW a_n AS
- SELECT *
- FROM a_run_n
- UNION ALL
- SELECT *
- FROM a_ds_n
- UNION ALL
- SELECT *
- FROM a_proc_n;
+create view a_n as
+ select *
+ from a_run_n
+ union all
+ select *
+ from a_ds_n
+ union all
+ select *
+ from a_proc_n;
-- views used for queries based on the schema summary
drop view function_call;
create view function_call as
- select proc.id, proc.type, proc.name, proc.run_id, app_inv.proc_name, to_timestamp(app_inv.start_time), app_inv.duration, app_inv.final_state, app_inv.scratch from proc natural join app_inv;
+ select proc.id, proc.type, proc.name, proc.run_id, app_inv.proc_name,
+ to_timestamp(app_inv.start_time), app_inv.duration, app_inv.final_state, app_inv.scratch
+ from proc
+ left outer join
+ app_inv on proc.id=app_inv.id;
drop view variable;
@@ -261,7 +265,6 @@
to_timestamp(start_time) as start_time, duration
from run;
-
drop view application_execution;
create view application_execution as
@@ -284,3 +287,9 @@
create view consumes as
select proc_id as function_call_id, ds_id as variable_id, param as parameter from ds_in;
+
+drop view variable_containment;
+
+create view variable_containment as
+ select out_id as container, in_id as containee
+ from ds_cont;
More information about the Swift-commit
mailing list