[Swift-commit] r5756 - provenancedb
lgadelha at ci.uchicago.edu
lgadelha at ci.uchicago.edu
Wed Apr 25 14:36:49 CDT 2012
Author: lgadelha
Date: 2012-04-25 14:36:49 -0500 (Wed, 25 Apr 2012)
New Revision: 5756
Modified:
provenancedb/prov-init.sql
provenancedb/prov-to-sql.sh
Log:
Simplified identifiers using foreign keys.
Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql 2012-04-24 19:24:13 UTC (rev 5755)
+++ provenancedb/prov-init.sql 2012-04-25 19:36:49 UTC (rev 5756)
@@ -71,11 +71,12 @@
-- process types: internal, rootthread, execute, function, compound, scope, operator
create table fun_call
(
- id varchar(256) primary key,
+ id varchar(256),
+ run_id varchar(256) references run (id) on delete cascade,
type varchar(16),
name varchar(256), -- in the case of an execute this refers to the transformation name in tc.data
- run_id varchar(256) references run (id) on delete cascade -- normalize: workflow_id of sub-procedure determined
- -- by compound procedure
+ primary key (id, run_id)
+
);
-- this gives information about each execute.
@@ -83,12 +84,15 @@
-- swift logs is also stored here. an execute is an OPM process.
create table app_fun_call
(
- id varchar(256) primary key references fun_call (id) on delete cascade,
+ id varchar(256),
+ run_id varchar(256),
name varchar(256), -- name of the app procedure that invokes the transformation
start_time numeric,
duration numeric,
final_state varchar(32),
- scratch varchar(2048)
+ scratch varchar(2048),
+ foreign key (id, run_id) references fun_call,
+ primary key (id, run_id)
);
-- this gives information about each application execution attempt, including
@@ -96,8 +100,9 @@
-- information such as wrapper logs
create table app_exec
(
- id varchar(256) primary key,
- app_fun_call_id varchar(256) references app_fun_call (id) on delete cascade,
+ id varchar(256),
+ app_fun_call_id varchar(256),
+ run_id varchar(256),
start_time numeric,
duration numeric,
final_state varchar(32),
@@ -115,21 +120,26 @@
majpfaults numeric,
minpfaults numeric,
ctxswinv numeric,
- ctxswvol numeric
+ ctxswvol numeric,
+ foreign key(app_fun_call_id, run_id) references app_fun_call,
+ primary key(id, app_fun_call_id, run_id)
);
-- app execution runtime info extracted from the /proc filesystem (assumes the app executed
-- in a Linux host)
create table rt_info
(
- app_exec_id varchar(256) references app_exec (id) on delete cascade,
+ app_exec_id varchar(256),
+ app_fun_call_id varchar(256),
+ run_id varchar(256),
timestamp numeric,
cpu_usage numeric,
max_phys_mem numeric,
max_virt_mem numeric,
io_read numeric,
io_write numeric,
- primary key (app_exec_id, timestamp)
+ foreign key (app_exec_id, app_fun_call_id, run_id) references app_exec,
+ primary key (app_exec_id, app_fun_call_id, run_id, timestamp)
);
-- ds stores all dataset identifiers.
@@ -175,22 +185,26 @@
-- input to and output from each process that is a Swift execution
create table ds_in
(
- fun_call_id varchar(256) references fun_call (id) on delete cascade,
+ fun_call_id varchar(256),
+ run_id varchar(256),
ds_id varchar(256) references ds (id) on delete cascade,
parameter varchar(256), -- the name of the parameter in this execute that
-- this dataset was bound to. sometimes this must
-- be contrived (for example, in positional varargs)
- primary key (fun_call_id,ds_id,parameter)
+ foreign key (fun_call_id, run_id) references fun_call,
+ primary key (fun_call_id, run_id, ds_id, parameter)
);
create table ds_out
(
- fun_call_id varchar(256) references fun_call (id) on delete cascade,
+ fun_call_id varchar(256),
+ run_id varchar(256),
ds_id varchar(256) references ds (id) on delete cascade,
parameter varchar(256), -- the name of the parameter in this execute that
-- this dataset was bound to. sometimes this must
-- be contrived (for example, in positional varargs)
- primary key (fun_call_id,ds_id,parameter)
+ foreign key (fun_call_id, run_id) references fun_call,
+ primary key (fun_call_id, run_id, ds_id, parameter)
);
@@ -210,17 +224,21 @@
);
create table annot_fun_call_num
- ( fun_call_id varchar(256) references fun_call (id) on delete cascade,
- name varchar(256),
- value numeric,
- primary key (fun_call_id, name)
+ ( fun_call_id varchar(256),
+ run_id varchar(256),
+ name varchar(256),
+ value numeric,
+ foreign key (fun_call_id, run_id) references fun_call,
+ primary key (fun_call_id, run_id, name)
);
create table annot_fun_call_text
- ( fun_call_id varchar(256) references fun_call (id) on delete cascade,
- name varchar(256),
- value varchar(2048),
- primary key (fun_call_id, name)
+ ( fun_call_id varchar(256),
+ run_id varchar(256),
+ name varchar(256),
+ value varchar(2048),
+ foreign key (fun_call_id, run_id) references fun_call,
+ primary key (fun_call_id, run_id, name)
);
create table annot_run_num
@@ -238,17 +256,23 @@
);
create table annot_app_exec_num
- ( run_id varchar(256) references run (id) on delete cascade,
- name varchar(256),
- value numeric,
- primary key (run_id, name)
+ ( id varchar(256),
+ app_fun_call_id varchar(256),
+ run_id varchar(256),
+ name varchar(256),
+ value numeric,
+ foreign key (id, app_fun_call_id, run_id) references app_exec,
+ primary key (id, app_fun_call_id, run_id, name)
);
create table annot_app_exec_text
- ( run_id varchar(256) references run (id) on delete cascade,
- name varchar(256),
- value varchar(2048),
- primary key (run_id, name)
+ ( id varchar(256),
+ app_fun_call_id varchar(256),
+ run_id varchar(256),
+ name varchar(256),
+ value varchar(2048),
+ foreign key (id, app_fun_call_id, run_id) references app_exec,
+ primary key (id, app_fun_call_id, run_id, name)
);
Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh 2012-04-24 19:24:13 UTC (rev 5755)
+++ provenancedb/prov-to-sql.sh 2012-04-25 19:36:49 UTC (rev 5756)
@@ -16,16 +16,16 @@
echo " - Function calls."
while read time duration thread localthread endstate tr_name scratch; do
- echo "INSERT INTO fun_call (id, type, run_id) VALUES ('$thread', 'execute', '$WF');" >> /tmp/$RUNID.sql
- echo "INSERT INTO app_fun_call (id, name, start_time, duration, final_state, scratch) VALUES ('$thread', '$tr_name', $time, $duration, '$endstate', '$scratch');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO fun_call (id, type, run_id) VALUES ('$localthread', 'execute', '$WF');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO app_fun_call (id, run_id, name, start_time, duration, final_state, scratch) VALUES ('$localthread', '$WF', '$tr_name', $time, $duration, '$endstate', '$scratch');" >> /tmp/$RUNID.sql
done < execute.global.event
echo " - Application executions."
while read start_time duration globalid id endstate thread site scratch; do
# cut off the last component of the thread, so that we end up at the
# parent thread id which should correspond with the execute-level ID
- inv_id="$WFID$(echo $thread | sed 's/-[^-]*$//')"
- echo "INSERT INTO app_exec (id, app_fun_call_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');" >> /tmp/$RUNID.sql
+ inv_id="$(echo $thread | sed 's/-[^-]*$//')"
+ echo "INSERT INTO app_exec (id, app_fun_call_id, run_id, start_time, duration, final_state, site) VALUES ('$id', '$inv_id', '$WF', $start_time, $duration, '$endstate', '$site');" >> /tmp/$RUNID.sql
done < execute2.global.event
echo " - Mapped variables."
@@ -45,9 +45,9 @@
echo "INSERT INTO ds (id) VALUES ('$outer');" >> /tmp/$RUNID.sql
echo "INSERT INTO ds (id) VALUES ('$inner');" >> /tmp/$RUNID.sql
echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');" >> /tmp/$RUNID.sql
- echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('${WFID}constructor:$outer', 'constructor', 'constructor', '$WF');" >> /tmp/$RUNID.sql
- echo "INSERT INTO ds_in (fun_call_id, ds_id, parameter) VALUES ('${WFID}constructor:$outer', '$inner', 'element');" >> /tmp/$RUNID.sql
- echo "INSERT INTO ds_out (fun_call_id, ds_id, parameter) VALUES ('${WFID}constructor:$outer', '$outer', 'collection');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('constructor:$outer', 'constructor', 'constructor', '$WF');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO ds_in (fun_call_id, ds_id, parameter) VALUES ('constructor:$outer', '$inner', 'element');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO ds_out (fun_call_id, ds_id, parameter) VALUES ('constructor:$outer', '$outer', 'collection');" >> /tmp/$RUNID.sql
done < tie-containers.txt
echo " - Operator calls."
@@ -58,7 +58,7 @@
rhs=$(echo $rhs | awk 'BEGIN { FS = "=" }; {print $2}')
result=$(echo $result | awk 'BEGIN { FS = "=" }; {print $2}')
- operatorid="${WFID}operator:$thread"
+ operatorid="operator:$thread"
echo "INSERT INTO ds (id) VALUES ('$lhs');" >> /tmp/$RUNID.sql
echo "INSERT INTO ds (id) VALUES ('$rhs');" >> /tmp/$RUNID.sql
@@ -71,14 +71,16 @@
echo " - Built-in function calls."
while read id name output; do
+ fid=$(echo $id | awk -F ":" '{print $3}')
echo "INSERT INTO ds (id) VALUES ('$output');" >> /tmp/$RUNID.sql
- echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$id', 'function', '$name', '$WF');" >> /tmp/$RUNID.sql
- echo "INSERT INTO ds_out (fun_call_id, ds_id, parameter) VALUES ('$id', '$output', 'result');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$fid', 'function', '$name', '$WF');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO ds_out (fun_call_id, ds_id, parameter) VALUES ('$fid', '$output', 'result');" >> /tmp/$RUNID.sql
done < functions.txt
while read id value; do
+ fid=$(echo $id | awk -F ":" '{print $3}')
echo "INSERT INTO ds (id) VALUES ('$value');" >> /tmp/$RUNID.sql
- echo "INSERT INTO ds_in (fun_call_id, ds_id, parameter) VALUES ('$id', '$value', 'undefined');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO ds_in (fun_call_id, ds_id, parameter) VALUES ('$fid', '$value', 'undefined');" >> /tmp/$RUNID.sql
done < function-inputs.txt
@@ -93,25 +95,23 @@
echo " - Compound functions."
while read start duration thread final_state procname ; do
if [ "$duration" != "last-event-line" ]; then
- compoundid=$WFID$thread
- echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$compoundid', 'compound', '$procname', '$WF');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$thread', 'compound', '$procname', '$WF');" >> /tmp/$RUNID.sql
fi
done < compound.event
while read start duration thread final_state procname ; do
if [ "$duration" != "last-event-line" ]; then
- fqid=$WFID$thread
- echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$fqid', 'internal', '$procname', '$WF');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$thread', 'internal', '$procname', '$WF');" >> /tmp/$RUNID.sql
fi
done < internalproc.event
while read t ; do
- thread="${WFID}$t"
- echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$thread', 'scope', 'scope', '$WF');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$t', 'scope', 'scope', '$WF');" >> /tmp/$RUNID.sql
done < scopes.txt
echo " - Variable consumption and production."
while read thread direction dataset variable rest; do
+ fid=$(echo $thread | awk -F ":" '{print $3}')
if [ "$direction" == "input" ] ; then
table=ds_in
else
@@ -119,7 +119,7 @@
fi
echo "INSERT INTO ds (id) VALUES ('$dataset');" >> /tmp/$RUNID.sql
- echo "INSERT INTO $table (fun_call_id, ds_id, parameter) VALUES ('$thread', '$dataset', '$variable');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO $table (fun_call_id, ds_id, parameter) VALUES ('$fid', '$dataset', '$variable');" >> /tmp/$RUNID.sql
done < tie-data-invocs.txt
@@ -147,28 +147,30 @@
minorpagefaults=$(echo $runtime | awk -F "," '{print $12}' | awk -F ":" '{print $2}')
contextswitchesinv=$(echo $runtime | awk -F "," '{print $13}' | awk -F ":" '{print $2}')
contextswitchesvol=$(echo $runtime | awk -F "," '{print $14}' | awk -F ":" '{print $2}')
- echo "UPDATE app_exec SET maxrss=$maxrss, walltime=$walltime, systime=$systime, usertime=$usertime, cpu=$cpu, fsin=$fsin, fsout=$fsout, timesswpd=$timesswapped, socketrecv=$socketrecv, socketsent=$socketsent, majpfaults=$majorpagefaults, minpfaults=$minorpagefaults, ctxswinv=$contextswitchesinv, ctxswvol=$contextswitchesvol where id='$execute2_id';" >> /tmp/$RUNID.sql
+ echo "UPDATE app_exec SET maxrss=$maxrss, walltime=$walltime, systime=$systime, usertime=$usertime, cpu=$cpu, fsin=$fsin, fsout=$fsout, timesswpd=$timesswapped, socketrecv=$socketrecv, socketsent=$socketsent, majpfaults=$majorpagefaults, minpfaults=$minorpagefaults, ctxswinv=$contextswitchesinv, ctxswvol=$contextswitchesvol where id='$execute2_id' and run_id='$WF';" >> /tmp/$RUNID.sql
done < runtime.txt
fi
echo " - Function call names."
while read thread appname; do
- echo "UPDATE fun_call SET name='$appname' WHERE id='$thread';" >> /tmp/$RUNID.sql
+ fid=$(echo $thread | awk -F ":" '{print $3}')
+ echo "UPDATE fun_call SET name='$appname' WHERE id='$fid';" >> /tmp/$RUNID.sql
done < invocation-procedure-names.txt
echo " - Wrapper log extra info."
if [ -f extrainfo.txt ]; then
while read execute2_id extrainfo; do
+ eid=$(echo $execute2_id | awk -F ":" '{print $3}')
echo $extrainfo | awk -F ";" '{ for (i = 1; i <= NF; i++)
print $i
}' | awk -F "=" '{ print $1 " " $2 }' | awk -F ":" '{ print $1 " " $2 }' > fields.txt
- id=$($SQLCMD --tuples-only -c "select app_fun_call_id from app_exec where id='$execute2_id';" | awk '{print $1}')
+ fid=$($SQLCMD --tuples-only -c "select app_fun_call_id from app_exec where id='$eid' and run_id='$WF';" | awk '{print $1}')
while read name type value; do
if [ "$type" = "num" ]; then
- echo "INSERT INTO annot_fun_call_num (id, name, value) VALUES ('$id', '$name', $value);" >> /tmp/$RUNID.sql
+ echo "INSERT INTO annot_app_exec_num (id, fun_call_id, run_id, name, value) VALUES ('$eid', '$fid', '$name', $value);" >> /tmp/$RUNID.sql
fi
if [ "$type" = "txt" ]; then
- echo "INSERT INTO annot_fun_call_text (id, name, value) VALUES ('$id', '$name', '$value');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO annot_app_exec_text (id, fun_call_id, run_id, name, value) VALUES ('$eid', '$fid', '$name', '$value');" >> /tmp/$RUNID.sql
fi
done < fields.txt
done < extrainfo.txt
More information about the Swift-commit
mailing list