[Swift-commit] r3746 - provenancedb
noreply at svn.ci.uchicago.edu
noreply at svn.ci.uchicago.edu
Wed Dec 8 12:15:33 CST 2010
Author: lgadelha
Date: 2010-12-08 12:15:33 -0600 (Wed, 08 Dec 2010)
New Revision: 3746
Modified:
provenancedb/import-run-to-sql
provenancedb/pql_functions.sql
provenancedb/prepare-provenance-chart
provenancedb/prov-to-sql.sh
provenancedb/swift-prov-import-all-logs
Log:
Minor fixes.
Modified: provenancedb/import-run-to-sql
===================================================================
--- provenancedb/import-run-to-sql 2010-12-08 01:21:35 UTC (rev 3745)
+++ provenancedb/import-run-to-sql 2010-12-08 18:15:33 UTC (rev 3746)
@@ -9,5 +9,5 @@
# with kickstart records expected to be in the same directory as the
# log file.
-PROVIDPREFIX=$PROVIDPREFIX prov-to-sql.sh $1
+version=$version prov-to-sql.sh $1
Modified: provenancedb/pql_functions.sql
===================================================================
--- provenancedb/pql_functions.sql 2010-12-08 01:21:35 UTC (rev 3745)
+++ provenancedb/pql_functions.sql 2010-12-08 18:15:33 UTC (rev 3746)
@@ -1,23 +1,32 @@
+-- Set of PosrgreSQL-specific SQL functions and PL/pgSQL procedures
+-- to query provenance.
+
-- SQL Functions
-CREATE OR REPLACE FUNCTION list_runs() RETURNS SETOF VARCHAR AS $$
- SELECT DISTINCT(log_filename) FROM workflow;
+-- list_runs_* lists workflows recorded in the database by id or log_filename
+DROP TYPE list_runs_type CASCADE;
+
+CREATE TYPE list_runs_type
+AS (id VARCHAR,
+ log_filename VARCHAR,
+ swift_version VARCHAR,
+ start_time TIMESTAMP WITH TIME ZONE,
+ duration NUMERIC,
+ final_state VARCHAR);
+CREATE OR REPLACE FUNCTION list_runs()
+RETURNS SETOF list_runs_type AS $$
+ SELECT id, log_filename, swift_version, TO_TIMESTAMP(start_time),
+ duration, import_status AS final_state
+ FROM workflow;
$$ LANGUAGE SQL;
--- lists distinct processes by name in a workflow
-CREATE OR REPLACE FUNCTION process_names(wf_id VARCHAR) RETURNS SETOF VARCHAR AS $$
- SELECT DISTINCT(process.name)
- FROM process
- WHERE process.workflow_id=$1;
-$$ LANGUAGE SQL;
-
-- lists variations in a parameter's value across workflows
-
DROP TYPE param_across_wf_type CASCADE;
CREATE TYPE param_across_wf_type AS (workflow VARCHAR, parameter VARCHAR, value VARCHAR);
-CREATE OR REPLACE FUNCTION param_across_wf(param_name VARCHAR) RETURNS SETOF param_across_wf_type AS $$
+CREATE OR REPLACE FUNCTION param_across_wf(param_name VARCHAR)
+RETURNS SETOF param_across_wf_type AS $$
SELECT workflow.log_filename,ds_usage.param_name,variable.value
FROM variable,ds_usage,process,workflow
WHERE variable.id=ds_usage.dataset_id AND ds_usage.process_id=process.id AND
@@ -25,6 +34,16 @@
GROUP BY workflow.log_filename,ds_usage.param_name,variable.value;
$$ LANGUAGE SQL;
+-- lists variations of the values of a set of parameters
+
+DROP TYPE param_across_wf_class_type CASCADE;
+CREATE TYPE param_across_wf_class_type AS (workflow VARCHAR, parameter VARCHAR, value VARCHAR);
+
+CREATE OR REPLACE FUNCTION param_across_wf_class(param_name VARCHAR, wf_class VARACHAR)
+RETURNS SETOF param_across_wf_type AS $$
+
+$$ LANGUAGE plpgsql;
+
-- correlate a parameter with workflow runtime statistics
DROP TYPE correlate_param_runtime_type CASCADE;
CREATE TYPE correlate_param_runtime_type
@@ -34,21 +53,41 @@
parameter VARCHAR,
parameter_value VARCHAR);
-CREATE OR REPLACE FUNCTION correlate_param_runtime(param_name VARCHAR) RETURNS SETOF correlate_param_runtime_type AS $$
+CREATE OR REPLACE FUNCTION correlate_param_runtime(param_name VARCHAR)
+RETURNS SETOF correlate_param_runtime_type AS $$
SELECT A.workflow,to_timestamp(B.start_time),B.duration,A.parameter,A.value
FROM param_across_wf($1) AS A, workflow AS B
WHERE A.workflow=B.log_filename;
$$ LANGUAGE SQL;
+CREATE OR REPLACE FUNCTION correlate_param_runtime(param_name VARCHAR)
+RETURNS SETOF correlate_param_runtime_type AS $$
+ SELECT workflow.id,to_timestamp(workflow.start_time),workflow.duration,ds_usage.param_name,variable.value
+ FROM variable,ds_usage,process,workflow
+ WHERE variable.id=ds_usage.dataset_id AND ds_usage.process_id=process.id AND
+ process.workflow_id=workflow.id AND ds_usage.param_name=$1
+$$ LANGUAGE SQL;
+
+-- lists distinct processes by name in a workflow
+CREATE OR REPLACE FUNCTION process_names(wf_id VARCHAR)
+RETURNS SETOF VARCHAR AS $$
+ SELECT DISTINCT(process.name)
+ FROM process
+ WHERE process.workflow_id=$1;
+$$ LANGUAGE SQL;
+
+-- lists distinct processes in a
+
-- OOPS-specific functions
+
+
CREATE OR REPLACE FUNCTION list_oops_runs() RETURNS SETOF VARCHAR AS $$
SELECT DISTINCT(value) FROM annot_wf_txt WHERE annot_wf_txt.name=('oops_run_id');
$$ LANGUAGE SQL;
DROP TYPE oops_param_across_wf_type CASCADE;
CREATE TYPE oops_param_across_wf_type AS (oops_run_id VARCHAR, param_name VARCHAR, variable VARCHAR);
-
CREATE OR REPLACE FUNCTION oops_param_across_wf(VARCHAR) RETURNS SETOF oops_param_across_wf_type AS $$
SELECT annot_wf_txt.value,ds_usage.param_name,variable.value
FROM variable,ds_usage,process,annot_wf_txt
@@ -63,7 +102,6 @@
DROP TYPE oops_summary CASCADE;
CREATE TYPE oops_summary AS (oops_run_id VARCHAR, start_time TIMESTAMP WITH TIME ZONE, duration_sec NUMERIC, swift_version VARCHAR);
-
CREATE OR REPLACE FUNCTION oops_run_summary(varchar) RETURNS SETOF oops_summary AS $$
SELECT annot_wf_txt.value as oops_run_id, to_timestamp(workflow.start_time) as start_time,
workflow.duration as duration_sec,workflow.swift_version as swift_version
@@ -80,16 +118,15 @@
DROP TYPE oops_wf_param_summary CASCADE;
CREATE TYPE oops_wf_param_summary AS (oops_run_id varchar, param_name varchar, value varchar);
-
CREATE OR REPLACE FUNCTION oops_variable_summary() RETURNS SETOF oops_wf_param_summary AS $$
SELECT annot_wf_txt.value,ds_usage.param_name,variable.value
FROM variable,ds_usage,process,annot_wf_txt
WHERE variable.id=ds_usage.dataset_id and ds_usage.process_id=process.id and process.workflow_id=annot_wf_txt.id;
$$ LANGUAGE SQL;
+
DROP TYPE oops_param_summary CASCADE;
CREATE TYPE oops_param_summary AS (param_name varchar, value varchar);
-
CREATE OR REPLACE FUNCTION oops_science_summary(varchar) RETURNS SETOF oops_param_summary AS $$
SELECT ds_usage.param_name,variable.value
FROM variable,ds_usage,process,annot_wf_txt
Modified: provenancedb/prepare-provenance-chart
===================================================================
--- provenancedb/prepare-provenance-chart 2010-12-08 01:21:35 UTC (rev 3745)
+++ provenancedb/prepare-provenance-chart 2010-12-08 18:15:33 UTC (rev 3746)
@@ -8,8 +8,8 @@
export RUNID=$(basename $1 .log)
-export WFID=$PROVIDPREFIX"execute:${RUNID}:"
-export EXECUTE2PREFIX=$PROVIDPREFIX"execute2:${RUNID}:"
+export WFID="execute:${RUNID}:"
+export EXECUTE2PREFIX="execute2:${RUNID}:"
# will output log information about datasets from a log file passed as $1
Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh 2010-12-08 01:21:35 UTC (rev 3745)
+++ provenancedb/prov-to-sql.sh 2010-12-08 18:15:33 UTC (rev 3746)
@@ -2,10 +2,10 @@
export RUNID=$(basename $1 .log)
-export WFID=$PROVIDPREFIX"execute:${RUNID}:"
+export WFID="execute:${RUNID}:"
# TODO is there already a URI form for identifying workflows?
-export WF=$PROVIDPREFIX"execute:${RUNID}:run"
+export WF="${RUNID}"
echo Generating SQL for $RUNID
@@ -27,34 +27,48 @@
echo "INSERT INTO execute2 (id, execute_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');" >> tmp-e2.sql
done < execute2.global.event
-while read col1 col2 col3 col4 col5 threadst namest lhsst rhsst resultst; do
- thread=`echo $threadst | awk 'BEGIN { FS = "=" }; {print $2}'`
- name=`echo $namest | awk 'BEGIN { FS = "=" }; {print $2}'`
- lhs=`echo $lhsst | awk 'BEGIN { FS = "=" }; {print $2}'`
- rhs=`echo $rhsst | awk 'BEGIN { FS = "=" }; {print $2}'`
- result=`echo $resultst | awk 'BEGIN { FS = "=" }; {print $2}'`
+while read col1 col2 col3 col4 col5 thread name lhs rhs result; do
+ thread=$(echo $thread | awk 'BEGIN { FS = "=" }; {print $2}')
+ name=$(echo $name | awk 'BEGIN { FS = "=" }; {print $2}')
+ lhs=$(echo $lhs | awk 'BEGIN { FS = "=" }; {print $2}')
+ rhs=$(echo $rhs | awk 'BEGIN { FS = "=" }; {print $2}')
+ result=$(echo $result | awk 'BEGIN { FS = "=" }; {print $2}')
operatorid="${WFID}operator:$thread"
- echo "INSERT INTO dataset (id) VALUES ('$PROVIDPREFIX$lhs');" >> tmp-ds.sql
- echo "INSERT INTO dataset (id) VALUES ('$PROVIDPREFIX$rhs');" >> tmp-ds.sql
- echo "INSERT INTO dataset (id) VALUES ('$PROVIDPREFIX$result');" >> tmp-ds.sql
+ if [ $version -le 3726 ]; then
+ $lhs=$(echo $lhs | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
+ $rhs=$(echo $rhs | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
+ $result=$(echo $result | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
+ fi
+
+ echo "INSERT INTO dataset (id) VALUES ('$lhs');" >> tmp-ds.sql
+ echo "INSERT INTO dataset (id) VALUES ('$rhs');" >> tmp-ds.sql
+ echo "INSERT INTO dataset (id) VALUES ('$result');" >> tmp-ds.sql
echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$operatorid', 'operator', '$name', '$WF');" >> tmp-p.sql
- echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$PROVIDPREFIX$lhs', 'lhs');" >> tmp-dsu.sql
- echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$PROVIDPREFIX$rhs', 'rhs');" >> tmp-dsu.sql
- echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'O', '$PROVIDPREFIX$result', 'result');" >> tmp-dsu.sql
+ echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$lhs', 'lhs');" >> tmp-dsu.sql
+ echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$rhs', 'rhs');" >> tmp-dsu.sql
+ echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'O', '$result', 'result');" >> tmp-dsu.sql
done < operators.txt
while read id name output; do
- echo "INSERT INTO dataset (id) VALUES ('$PROVIDPREFIX$output');" >> tmp-ds.sql
+ if [ $version -le 3726 ]; then
+ $output=$(echo $output | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
+ fi
+ echo "INSERT INTO dataset (id) VALUES ('$output');" >> tmp-ds.sql
echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$id', 'function', '$name', '$WF');" >> tmp-p.sql
- echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'O', '$PROVIDPREFIX$output', 'result');" >> tmp-dsu.sql
+ echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'O', '$output', 'result');" >> tmp-dsu.sql
done < functions.txt
while read id value; do
# TODO need ordering/naming
- echo "INSERT INTO dataset (id) VALUES ('$PROVIDPREFIX$value');" >> tmp-ds.sql
- echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'I', '$PROVIDPREFIX$value', 'undefined');" >> tmp-dsu.sql
+
+ if [ $version -le 3726 ]; then
+ $value=$(echo $value | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
+ fi
+
+ echo "INSERT INTO dataset (id) VALUES ('$value');" >> tmp-ds.sql
+ echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'I', '$value', 'undefined');" >> tmp-dsu.sql
done < function-inputs.txt
@@ -63,19 +77,35 @@
done < invocation-procedure-names.txt
while read outer inner; do
- echo "INSERT INTO dataset (id) VALUES ('$PROVIDPREFIX$outer');" >> tmp-ds.sql
- echo "INSERT INTO dataset (id) VALUES ('$PROVIDPREFIX$inner');" >> tmp-ds.sql
- echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$PROVIDPREFIX$outer', '$PROVIDPREFIX$inner');" >> tmp-dsc.sql
+
+ if [ $version -le 3726 ]; then
+ $outer=$(echo $outer | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
+ $inner=$(echo $inner | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
+ fi
+
+ echo "INSERT INTO dataset (id) VALUES ('$outer');" >> tmp-ds.sql
+ echo "INSERT INTO dataset (id) VALUES ('$inner');" >> tmp-ds.sql
+ echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');" >> tmp-dsc.sql
done < tie-containers.txt
while read dataset filename; do
- echo "INSERT INTO dataset (id) VALUES ('$PROVIDPREFIX$dataset');" >> tmp-ds.sql
- echo "INSERT INTO file (id, filename) VALUES ('$PROVIDPREFIX$dataset', '$filename');" >> tmp-f.sql
+
+ if [ $version -le 3726 ]; then
+ $dataset=$(echo $dataset | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
+ fi
+
+ echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
+ echo "INSERT INTO file (id, filename) VALUES ('$dataset', '$filename');" >> tmp-f.sql
done < dataset-filenames.txt
while read dataset value; do
- echo "INSERT INTO dataset (id) VALUES ('$PROVIDPREFIX$dataset');" >> tmp-ds.sql
- echo "INSERT INTO variable (id, value) VALUES ('$PROVIDPREFIX$dataset', '$value');" >> tmp-v.sql
+
+ if [ $version -le 3726 ]; then
+ $dataset=$(echo $dataset | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
+ fi
+
+ echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
+ echo "INSERT INTO variable (id, value) VALUES ('$dataset', '$value');" >> tmp-v.sql
done < dataset-values.txt
while read start duration wfid rest; do
@@ -114,8 +144,13 @@
else
dir=O
fi
- echo "INSERT INTO dataset (id) VALUES ('$PROVIDPREFIX$dataset');" >> tmp-ds.sql
- echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$thread', '$dir', '$PROVIDPREFIX$dataset', '$variable');" >> tmp-dsu.sql
+
+ if [ $version -le 3726 ]; then
+ $dataset=$(echo $dataset | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g')
+ fi
+
+ echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql
+ echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$thread', '$dir', '$dataset', '$variable');" >> tmp-dsu.sql
done < tie-data-invocs.txt
while read id ; do
Modified: provenancedb/swift-prov-import-all-logs
===================================================================
--- provenancedb/swift-prov-import-all-logs 2010-12-08 01:21:35 UTC (rev 3745)
+++ provenancedb/swift-prov-import-all-logs 2010-12-08 18:15:33 UTC (rev 3746)
@@ -50,22 +50,19 @@
fi
export RUNID=$(basename $filename .log)
- if [ $version -le 3726 ]; then
- PROVIDPREFIX="tag:ci.uchicago.edu,2008:swiftlogs:"
- fi
- export WF=$PROVIDPREFIX"execute:${RUNID}:run"
+ export WF="${RUNID}"
echo "INSERT INTO workflow (id, log_filename, swift_version, import_status) VALUES ('$WF','$filename','$version','$wfstatus');" | $SQLCMD
echo version $version in log file $filename
echo ============= will import =============
- PROVIDPREFIX=$PROVIDPREFIX prepare-for-import $filename
+ prepare-for-import $filename
if [ "$?" != "0" ]; then
echo prepare-for-import failed
exit 2
fi
- PROVIDPREFIX=$PROVIDPREFIX import-run-to-sql $filename
+ version=$version import-run-to-sql $filename
if [ "$?" != "0" ]; then
echo import-run-to-sql failed
exit 3
More information about the Swift-commit
mailing list