[Swift-commit] r5756 - provenancedb

lgadelha at ci.uchicago.edu lgadelha at ci.uchicago.edu
Wed Apr 25 14:36:49 CDT 2012


Author: lgadelha
Date: 2012-04-25 14:36:49 -0500 (Wed, 25 Apr 2012)
New Revision: 5756

Modified:
   provenancedb/prov-init.sql
   provenancedb/prov-to-sql.sh
Log:
Simplified identifiers using foreign keys.


Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql	2012-04-24 19:24:13 UTC (rev 5755)
+++ provenancedb/prov-init.sql	2012-04-25 19:36:49 UTC (rev 5756)
@@ -71,11 +71,12 @@
 -- process types: internal, rootthread, execute, function, compound, scope, operator
 create table fun_call
     (
-     id     varchar(256) primary key, 
+     id     varchar(256), 
+     run_id varchar(256) references run (id) on delete cascade,  
      type   varchar(16),
      name   varchar(256), -- in the case of an execute this refers to the transformation name in tc.data
-     run_id varchar(256) references run (id) on delete cascade   -- normalize: workflow_id of sub-procedure determined
-          	 	      		 	       	  	 	   -- by compound procedure 
+     primary key (id, run_id)
+									   									      	 
 );
 
 -- this gives information about each execute.
@@ -83,12 +84,15 @@
 -- swift logs is also stored here. an execute is an OPM process.
 create table app_fun_call
     (
-     id			varchar(256) primary key references fun_call (id) on delete cascade,  
+     id			varchar(256),  
+     run_id		varchar(256),
      name      		varchar(256), -- name of the app procedure that invokes the transformation
      start_time	    	numeric,
      duration		numeric,
      final_state	varchar(32),
-     scratch		varchar(2048)
+     scratch		varchar(2048),
+     foreign key (id, run_id) references fun_call,
+     primary key (id, run_id)
 );
 
 -- this gives information about each application execution attempt, including
@@ -96,8 +100,9 @@
 -- information such as wrapper logs
 create table app_exec
     (
-     id                varchar(256) primary key,
-     app_fun_call_id   varchar(256) references app_fun_call (id) on delete cascade, 
+     id                varchar(256),
+     app_fun_call_id   varchar(256), 
+     run_id	       varchar(256),
      start_time        numeric,
      duration          numeric,
      final_state       varchar(32),
@@ -115,21 +120,26 @@
      majpfaults        numeric,
      minpfaults        numeric,
      ctxswinv	       numeric,
-     ctxswvol	       numeric
+     ctxswvol	       numeric,
+     foreign key(app_fun_call_id, run_id) references app_fun_call,
+     primary key(id, app_fun_call_id, run_id)
 );
 
 -- app execution runtime info extracted from the /proc filesystem (assumes the app executed
 -- in a Linux host) 
 create table rt_info
    ( 
-     app_exec_id        varchar(256) references app_exec (id) on delete cascade, 
+     app_exec_id        varchar(256), 
+     app_fun_call_id    varchar(256), 
+     run_id	        varchar(256),    
      timestamp		numeric,
      cpu_usage          numeric,
      max_phys_mem	numeric,
      max_virt_mem	numeric,
      io_read		numeric,
      io_write		numeric,
-     primary key (app_exec_id, timestamp)
+     foreign key (app_exec_id, app_fun_call_id, run_id) references app_exec,
+     primary key (app_exec_id, app_fun_call_id, run_id, timestamp)
 );
 
 -- ds stores all dataset identifiers.
@@ -175,22 +185,26 @@
 -- input to and output from each process that is a Swift execution
 create table ds_in
     (
-     fun_call_id	varchar(256) references fun_call (id) on delete cascade, 
+     fun_call_id	varchar(256), 
+     run_id		varchar(256),
      ds_id   		varchar(256) references ds (id) on delete cascade,
      parameter   	varchar(256), -- the name of the parameter in this execute that
                              	      -- this dataset was bound to. sometimes this must
                               	      -- be contrived (for example, in positional varargs)
-     primary key (fun_call_id,ds_id,parameter)
+     foreign key (fun_call_id, run_id) references fun_call,
+     primary key (fun_call_id, run_id, ds_id, parameter)
     );
 
 create table ds_out
     (
-     fun_call_id	varchar(256) references fun_call (id) on delete cascade, 
+     fun_call_id	varchar(256), 
+     run_id		varchar(256),
      ds_id   		varchar(256) references ds (id) on delete cascade,
      parameter   	varchar(256), -- the name of the parameter in this execute that
                               	      -- this dataset was bound to. sometimes this must
                               	      -- be contrived (for example, in positional varargs)
-     primary key (fun_call_id,ds_id,parameter)
+     foreign key (fun_call_id, run_id) references fun_call,
+     primary key (fun_call_id, run_id, ds_id, parameter)
     );
 
 
@@ -210,17 +224,21 @@
    );
 
 create table annot_fun_call_num
-   ( fun_call_id    varchar(256) references fun_call (id) on delete cascade, 
-     name       varchar(256),
-     value      numeric,
-     primary key (fun_call_id, name)
+   ( fun_call_id	varchar(256), 
+     run_id	        varchar(256),    
+     name       	varchar(256),
+     value      	numeric,
+     foreign key (fun_call_id, run_id) references fun_call,
+     primary key (fun_call_id, run_id, name)
    );
 
 create table annot_fun_call_text
-   ( fun_call_id    varchar(256) references fun_call (id) on delete cascade, 
-     name       varchar(256),
-     value      varchar(2048),
-     primary key (fun_call_id, name)
+   ( fun_call_id	varchar(256), 
+     run_id	        varchar(256),    
+     name       	varchar(256),
+     value      	varchar(2048),
+     foreign key (fun_call_id, run_id) references fun_call,
+     primary key (fun_call_id, run_id, name)
    );
 
 create table annot_run_num
@@ -238,17 +256,23 @@
    );
 
 create table annot_app_exec_num
-   ( run_id    varchar(256) references run (id) on delete cascade, 
-     name      varchar(256),
-     value     numeric,
-     primary key (run_id, name)
+   ( id			varchar(256), 
+     app_fun_call_id    varchar(256), 
+     run_id	        varchar(256),    
+     name      		varchar(256),
+     value     		numeric,
+     foreign key (id, app_fun_call_id, run_id) references app_exec,
+     primary key (id, app_fun_call_id, run_id, name)
    );
 
 create table annot_app_exec_text
-   ( run_id    varchar(256) references run (id) on delete cascade, 
-     name      varchar(256),
-     value     varchar(2048),
-     primary key (run_id, name)
+   ( id			varchar(256), 
+     app_fun_call_id    varchar(256), 
+     run_id	        varchar(256),    
+     name      		varchar(256),
+     value     		varchar(2048),
+     foreign key (id, app_fun_call_id, run_id) references app_exec,
+     primary key (id, app_fun_call_id, run_id, name)
    );
 
 

Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh	2012-04-24 19:24:13 UTC (rev 5755)
+++ provenancedb/prov-to-sql.sh	2012-04-25 19:36:49 UTC (rev 5756)
@@ -16,16 +16,16 @@
 
 echo "    - Function calls."
 while read time duration thread localthread endstate tr_name scratch; do
-    echo "INSERT INTO fun_call (id, type, run_id) VALUES ('$thread', 'execute', '$WF');"  >> /tmp/$RUNID.sql
-    echo "INSERT INTO app_fun_call (id, name, start_time, duration, final_state, scratch) VALUES ('$thread', '$tr_name', $time, $duration, '$endstate', '$scratch');"   >> /tmp/$RUNID.sql
+    echo "INSERT INTO fun_call (id, type, run_id) VALUES ('$localthread', 'execute', '$WF');"  >> /tmp/$RUNID.sql
+    echo "INSERT INTO app_fun_call (id, run_id, name, start_time, duration, final_state, scratch) VALUES ('$localthread', '$WF', '$tr_name', $time, $duration, '$endstate', '$scratch');"   >> /tmp/$RUNID.sql
 done < execute.global.event
 
 echo "    - Application executions."
 while read start_time duration globalid id endstate thread site scratch; do
     # cut off the last component of the thread, so that we end up at the
     # parent thread id which should correspond with the execute-level ID
-    inv_id="$WFID$(echo $thread | sed 's/-[^-]*$//')"
-    echo  "INSERT INTO app_exec (id, app_fun_call_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');"  >> /tmp/$RUNID.sql
+    inv_id="$(echo $thread | sed 's/-[^-]*$//')"
+    echo  "INSERT INTO app_exec (id, app_fun_call_id, run_id, start_time, duration, final_state, site) VALUES ('$id', '$inv_id', '$WF', $start_time, $duration, '$endstate', '$site');"  >> /tmp/$RUNID.sql
 done < execute2.global.event
 
 echo "    - Mapped variables."
@@ -45,9 +45,9 @@
     echo  "INSERT INTO ds (id) VALUES ('$outer');"  >> /tmp/$RUNID.sql
     echo  "INSERT INTO ds (id) VALUES ('$inner');"  >> /tmp/$RUNID.sql
     echo  "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');"  >> /tmp/$RUNID.sql
-    echo  "INSERT INTO fun_call (id, type, name, run_id) VALUES ('${WFID}constructor:$outer', 'constructor', 'constructor', '$WF');"  >> /tmp/$RUNID.sql
-    echo  "INSERT INTO ds_in (fun_call_id, ds_id, parameter) VALUES ('${WFID}constructor:$outer', '$inner', 'element');"  >> /tmp/$RUNID.sql
-    echo  "INSERT INTO ds_out (fun_call_id, ds_id, parameter) VALUES ('${WFID}constructor:$outer', '$outer', 'collection');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO fun_call (id, type, name, run_id) VALUES ('constructor:$outer', 'constructor', 'constructor', '$WF');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO ds_in (fun_call_id, ds_id, parameter) VALUES ('constructor:$outer', '$inner', 'element');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO ds_out (fun_call_id, ds_id, parameter) VALUES ('constructor:$outer', '$outer', 'collection');"  >> /tmp/$RUNID.sql
 done < tie-containers.txt
 
 echo "    - Operator calls."
@@ -58,7 +58,7 @@
     rhs=$(echo $rhs | awk 'BEGIN { FS = "=" }; {print $2}')
     result=$(echo $result | awk 'BEGIN { FS = "=" }; {print $2}')
     
-    operatorid="${WFID}operator:$thread"
+    operatorid="operator:$thread"
     
     echo  "INSERT INTO ds (id) VALUES ('$lhs');" >> /tmp/$RUNID.sql
     echo  "INSERT INTO ds (id) VALUES ('$rhs');" >> /tmp/$RUNID.sql
@@ -71,14 +71,16 @@
 
 echo "    - Built-in function calls."
 while read id name output; do
+    fid=$(echo $id | awk -F ":" '{print $3}')
     echo  "INSERT INTO ds (id) VALUES ('$output');"  >> /tmp/$RUNID.sql
-    echo  "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$id', 'function', '$name', '$WF');"  >> /tmp/$RUNID.sql
-    echo  "INSERT INTO ds_out (fun_call_id, ds_id, parameter) VALUES ('$id', '$output', 'result');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$fid', 'function', '$name', '$WF');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO ds_out (fun_call_id, ds_id, parameter) VALUES ('$fid', '$output', 'result');"  >> /tmp/$RUNID.sql
 done < functions.txt
 
 while read id value; do
+    fid=$(echo $id | awk -F ":" '{print $3}')
     echo  "INSERT INTO ds (id) VALUES ('$value');" >> /tmp/$RUNID.sql
-    echo  "INSERT INTO ds_in (fun_call_id, ds_id, parameter) VALUES ('$id', '$value', 'undefined');"  >> /tmp/$RUNID.sql
+    echo  "INSERT INTO ds_in (fun_call_id, ds_id, parameter) VALUES ('$fid', '$value', 'undefined');"  >> /tmp/$RUNID.sql
 done < function-inputs.txt
 
 
@@ -93,25 +95,23 @@
 echo "    - Compound functions."
 while read start duration thread final_state procname ; do
     if [ "$duration" != "last-event-line" ]; then
-	compoundid=$WFID$thread
-	echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$compoundid', 'compound', '$procname', '$WF');"  >> /tmp/$RUNID.sql
+	echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$thread', 'compound', '$procname', '$WF');"  >> /tmp/$RUNID.sql
     fi
 done < compound.event
 
 while read start duration thread final_state procname ; do
     if [ "$duration" != "last-event-line" ]; then
-	fqid=$WFID$thread
-	echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$fqid', 'internal', '$procname', '$WF');"  >> /tmp/$RUNID.sql
+	echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$thread', 'internal', '$procname', '$WF');"  >> /tmp/$RUNID.sql
     fi	
 done < internalproc.event
 
 while read t ; do 
-    thread="${WFID}$t"
-    echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$thread', 'scope', 'scope', '$WF');"  >> /tmp/$RUNID.sql
+    echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$t', 'scope', 'scope', '$WF');"  >> /tmp/$RUNID.sql
 done < scopes.txt
 
 echo "    - Variable consumption and production."
 while read thread direction dataset variable rest; do 
+    fid=$(echo $thread | awk -F ":" '{print $3}')
     if [ "$direction" == "input" ] ; then
 	table=ds_in
     else
@@ -119,7 +119,7 @@
     fi
     
     echo "INSERT INTO ds (id) VALUES ('$dataset');"  >> /tmp/$RUNID.sql
-    echo "INSERT INTO $table (fun_call_id, ds_id, parameter) VALUES ('$thread', '$dataset', '$variable');"  >> /tmp/$RUNID.sql
+    echo "INSERT INTO $table (fun_call_id, ds_id, parameter) VALUES ('$fid', '$dataset', '$variable');"  >> /tmp/$RUNID.sql
 done < tie-data-invocs.txt
 
 
@@ -147,28 +147,30 @@
 	minorpagefaults=$(echo $runtime | awk -F "," '{print $12}' | awk -F ":" '{print $2}')
 	contextswitchesinv=$(echo $runtime | awk -F "," '{print $13}' | awk -F ":" '{print $2}')
 	contextswitchesvol=$(echo $runtime | awk -F "," '{print $14}' | awk -F ":" '{print $2}')
-	echo "UPDATE app_exec SET maxrss=$maxrss, walltime=$walltime, systime=$systime, usertime=$usertime, cpu=$cpu, fsin=$fsin, fsout=$fsout, timesswpd=$timesswapped, socketrecv=$socketrecv, socketsent=$socketsent, majpfaults=$majorpagefaults, minpfaults=$minorpagefaults, ctxswinv=$contextswitchesinv, ctxswvol=$contextswitchesvol where id='$execute2_id';"  >> /tmp/$RUNID.sql
+	echo "UPDATE app_exec SET maxrss=$maxrss, walltime=$walltime, systime=$systime, usertime=$usertime, cpu=$cpu, fsin=$fsin, fsout=$fsout, timesswpd=$timesswapped, socketrecv=$socketrecv, socketsent=$socketsent, majpfaults=$majorpagefaults, minpfaults=$minorpagefaults, ctxswinv=$contextswitchesinv, ctxswvol=$contextswitchesvol where id='$execute2_id' and run_id='$WF';"  >> /tmp/$RUNID.sql
     done < runtime.txt
 fi
 
 echo "    - Function call names."
 while read thread appname; do
-    echo  "UPDATE fun_call SET name='$appname' WHERE id='$thread';"  >> /tmp/$RUNID.sql
+    fid=$(echo $thread | awk -F ":" '{print $3}')
+    echo  "UPDATE fun_call SET name='$appname' WHERE id='$fid';"  >> /tmp/$RUNID.sql
 done < invocation-procedure-names.txt
 
 echo "    - Wrapper log extra info."
 if [ -f extrainfo.txt ]; then
     while read execute2_id extrainfo; do
+	eid=$(echo $execute2_id | awk -F ":" '{print $3}')
 	echo $extrainfo | awk -F ";"  '{ for (i = 1; i <= NF; i++)
                                                print $i
                                          }' | awk -F "=" '{ print $1 " " $2 }' | awk -F ":" '{ print $1 " " $2 }' > fields.txt
-	id=$($SQLCMD --tuples-only -c "select app_fun_call_id from app_exec where id='$execute2_id';" | awk '{print $1}')
+	fid=$($SQLCMD --tuples-only -c "select app_fun_call_id from app_exec where id='$eid' and run_id='$WF';" | awk '{print $1}')
 	while read name type value; do
 	    if [ "$type" = "num" ]; then
-		echo "INSERT INTO annot_fun_call_num (id, name, value) VALUES ('$id', '$name', $value);"  >> /tmp/$RUNID.sql
+		echo "INSERT INTO annot_app_exec_num (id, fun_call_id, run_id, name, value) VALUES ('$eid', '$fid', '$name', $value);"  >> /tmp/$RUNID.sql
 	    fi 
 	    if [ "$type" = "txt" ]; then
-		echo "INSERT INTO annot_fun_call_text (id, name, value) VALUES ('$id', '$name', '$value');"  >> /tmp/$RUNID.sql
+		echo "INSERT INTO annot_app_exec_text (id, fun_call_id, run_id, name, value) VALUES ('$eid', '$fid', '$name', '$value');"  >> /tmp/$RUNID.sql
 	    fi
 	done < fields.txt
     done < extrainfo.txt




More information about the Swift-commit mailing list