[Swift-commit] r5786 - in provenancedb: . swift_mod
lgadelha at ci.uchicago.edu
lgadelha at ci.uchicago.edu
Thu May 10 07:19:03 CDT 2012
Author: lgadelha
Date: 2012-05-10 07:27:54 -0500 (Thu, 10 May 2012)
New Revision: 5786
Added:
provenancedb/swift_mod/
provenancedb/swift_mod/_swiftwrap_runtime_aggregate
provenancedb/swift_mod/_swiftwrap_runtime_snapshots
provenancedb/swift_mod/create-everylog-vs-versions-data
Modified:
provenancedb/pql_functions.sql
provenancedb/prepare-provenance-chart
provenancedb/prov-init.sql
provenancedb/prov-to-sql.sh
provenancedb/swift-prov-import-all-logs
Log:
- Prospective provenance import scripts.
- Created swift_mod directory containing the modifications needed to gather runtime information in Swift about application executions.
Modified: provenancedb/pql_functions.sql
===================================================================
--- provenancedb/pql_functions.sql 2012-05-09 22:18:28 UTC (rev 5785)
+++ provenancedb/pql_functions.sql 2012-05-10 12:27:54 UTC (rev 5786)
@@ -12,8 +12,8 @@
returns setof compare_run_by_parameter_type
as $$
select run_id, parameter, value
- from ds_io,fun_call,primitive
- where fun_call.id=ds_io.fun_call_id and ds_io.ds_id=primitive.id and parameter=$1;
+ from dataset_io,fun_call,primitive
+ where fun_call.id=dataset_io.function_call_id and dataset_io.dataset_id=primitive.id and parameter=$1;
$$ language sql;
-- PostgreSQL >= 9.0
@@ -26,13 +26,13 @@
-- AS $$
-- SELECT fun_call.run_id, ds_out.parameter, primitive.value
-- FROM primitive, ds_out, fun_call
--- WHERE primitive.id=ds_out.ds_id AND ds_out.fun_call_id=fun_call.id AND
+-- WHERE primitive.id=ds_out.dataset_id AND ds_out.fun_call_id=fun_call.id AND
-- ds_out.parameter=$1
-- GROUP BY fun_call.run_id, ds_out.parameter, primitive.value
-- UNION
-- SELECT fun_call.run_id, ds_in.parameter, primitive.value
-- FROM primitive, ds_in, fun_call
--- WHERE primitive.id=ds_in.ds_id AND ds_in.fun_call_id=fun_call.id AND
+-- WHERE primitive.id=ds_in.dataset_id AND ds_in.fun_call_id=fun_call.id AND
-- ds_in.parameter=$1
-- GROUP BY fun_call.run_id, ds_in.parameter, primitive.value
--$$ LANGUAGE SQL;
@@ -54,105 +54,105 @@
-- USING (workflow_id);
--$$ LANGUAGE SQL;
-DROP TYPE compare_run_by_annot_num_type;
+DROP TYPE compare_run_by_annot_num_type CASCADE;
CREATE TYPE compare_run_by_annot_num_type as (run_id VARCHAR, name VARCHAR, value NUMERIC);
CREATE OR REPLACE FUNCTION compare_run_by_annot_num(name VARCHAR)
RETURNS SETOF compare_run_by_annot_num_type
AS $$
- SELECT fun_call.run_id, annot_ds_num.name, annot_ds_num.value
- FROM annot_ds_num,ds_io,ds_cont,fun_call
- WHERE annot_ds_num.ds_id=ds_cont.in_id AND ds_cont.out_id=ds_io.ds_id AND
- ds_io.fun_call_id=fun_call.id AND annot_ds_num.name=$1
+ SELECT fun_call.run_id, annot_dataset_num.name, annot_dataset_num.value
+ FROM annot_dataset_num,dataset_io,dataset_containment,fun_call
+ WHERE annot_dataset_num.dataset_id=dataset_containment.in_id AND dataset_containment.out_id=dataset_io.dataset_id AND
+ dataset_io.function_call_id=fun_call.id AND annot_dataset_num.name=$1
UNION
- SELECT fun_call.run_id, annot_ds_num.name, annot_ds_num.value
- FROM fun_call, ds_io, annot_ds_num
- WHERE fun_call.id=ds_io.fun_call_id and ds_io.ds_id=annot_ds_num.ds_id and
- annot_ds_num.name=$1
+ SELECT fun_call.run_id, annot_dataset_num.name, annot_dataset_num.value
+ FROM fun_call, dataset_io, annot_dataset_num
+ WHERE fun_call.id=dataset_io.function_call_id and dataset_io.dataset_id=annot_dataset_num.dataset_id and
+ annot_dataset_num.name=$1
UNION
- SELECT fun_call.run_id, annot_fun_call_num.name, annot_fun_call_num.value
- FROM fun_call, annot_fun_call_num
- WHERE fun_call.id=annot_fun_call_num.fun_call_id and annot_fun_call_num.name=$1
+ SELECT fun_call.run_id, annot_function_call_num.name, annot_function_call_num.value
+ FROM fun_call, annot_function_call_num
+ WHERE fun_call.id=annot_function_call_num.function_call_id and annot_function_call_num.name=$1
UNION
- SELECT run.id as run_id, annot_run_num.name, annot_run_num.value
- FROM run, annot_run_num
- WHERE run.id=annot_run_num.run_id and annot_run_num.name=$1
+ SELECT run.id as run_id, annot_script_run_num.name, annot_script_run_num.value
+ FROM run, annot_script_run_num
+ WHERE run.id=annot_script_run_num.script_run_id and annot_script_run_num.name=$1
$$ LANGUAGE SQL;
-DROP TYPE compare_run_by_key_numeric_type;
+DROP TYPE compare_run_by_key_numeric_type CASCADE;
CREATE TYPE compare_run_by_key_numeric_type as (run_id VARCHAR, name VARCHAR, value NUMERIC);
CREATE OR REPLACE FUNCTION compare_run_by_key_numeric(name VARCHAR)
RETURNS SETOF compare_run_by_key_numeric_type
AS $$
- SELECT fun_call.run_id, annot_ds_n.name, annot_ds_n.value
- FROM annot_ds_n,ds_io,ds_cont,fun_call
- WHERE annot_ds_n.ds_id=ds_cont.in_id AND ds_cont.out_id=ds_io.ds_id AND
- ds_io.fun_call_id=fun_call.id AND annot_ds_n.name=$1
+ SELECT fun_call.run_id, annot_dataset_num.name, annot_dataset_num.value
+ FROM annot_dataset_num,dataset_io,dataset_containment,fun_call
+ WHERE annot_dataset_num.dataset_id=dataset_containment.in_id AND dataset_containment.out_id=dataset_io.dataset_id AND
+ dataset_io.function_call_id=fun_call.id AND annot_dataset_num.name=$1
UNION
- SELECT fun_call.run_id, annot_ds_n.name, annot_ds_n.value
- FROM fun_call, ds_io, annot_ds_n
- WHERE fun_call.id=ds_io.fun_call_id and ds_io.ds_id=annot_ds_n.ds_id and
- annot_ds_n.name=$1
+ SELECT fun_call.run_id, annot_dataset_num.name, annot_dataset_num.value
+ FROM fun_call, dataset_io, annot_dataset_num
+ WHERE fun_call.id=dataset_io.function_call_id and dataset_io.dataset_id=annot_dataset_num.dataset_id and
+ annot_dataset_num.name=$1
UNION
- SELECT fun_call.run_id, annot_fun_call_n.name, annot_fun_call_n.value
- FROM fun_call, annot_fun_call_n
- WHERE fun_call.id=annot_fun_call_n.fun_call_id and annot_fun_call_n.name=$1
+ SELECT fun_call.run_id, annot_function_call_num.name, annot_function_call_num.value
+ FROM fun_call, annot_function_call_num
+ WHERE fun_call.id=annot_function_call_num.function_call_id and annot_function_call_num.name=$1
UNION
- SELECT run.id as run_id, annot_run_n.name, annot_run_n.value
- FROM run, annot_run_n
- WHERE run.id=annot_run_n.run_id and annot_run_n.name=$1
+ SELECT run.id as run_id, annot_script_run_num.name, annot_script_run_num.value
+ FROM run, annot_script_run_num
+ WHERE run.id=annot_script_run_num.script_run_id and annot_script_run_num.name=$1
$$ LANGUAGE SQL;
-DROP TYPE compare_run_by_annot_txt_type;
+DROP TYPE compare_run_by_annot_txt_type CASCADE;
CREATE TYPE compare_run_by_annot_txt_type as (run_id VARCHAR, name VARCHAR, value VARCHAR);
CREATE OR REPLACE FUNCTION compare_run_by_annot_txt(name VARCHAR)
RETURNS SETOF compare_run_by_annot_txt_type
AS $$
- SELECT fun_call.run_id, annot_ds_text.name, annot_ds_text.value
- FROM annot_ds_text,ds_io,ds_cont,fun_call
- WHERE annot_ds_text.ds_id=ds_cont.in_id AND ds_cont.out_id=ds_io.ds_id AND
- ds_io.fun_call_id=fun_call.id AND annot_ds_text.name=$1
+ SELECT fun_call.run_id, annot_dataset_text.name, annot_dataset_text.value
+ FROM annot_dataset_text,dataset_io,dataset_containment,fun_call
+ WHERE annot_dataset_text.dataset_id=dataset_containment.in_id AND dataset_containment.out_id=dataset_io.dataset_id AND
+ dataset_io.function_call_id=fun_call.id AND annot_dataset_text.name=$1
UNION
- SELECT fun_call.run_id, annot_ds_text.name, annot_ds_text.value
- FROM fun_call, ds_io, annot_ds_text
- WHERE fun_call.id=ds_io.fun_call_id and ds_io.ds_id=annot_ds_text.ds_id and
- annot_ds_text.name=$1
+ SELECT fun_call.run_id, annot_dataset_text.name, annot_dataset_text.value
+ FROM fun_call, dataset_io, annot_dataset_text
+ WHERE fun_call.id=dataset_io.function_call_id and dataset_io.dataset_id=annot_dataset_text.dataset_id and
+ annot_dataset_text.name=$1
UNION
- SELECT fun_call.run_id, annot_fun_call_text.name, annot_fun_call_text.value
- FROM fun_call, annot_fun_call_text
- WHERE fun_call.id=annot_fun_call_text.fun_call_id and annot_fun_call_text.name=$1
+ SELECT fun_call.run_id, annot_function_call_text.name, annot_function_call_text.value
+ FROM fun_call, annot_function_call_text
+ WHERE fun_call.id=annot_function_call_text.function_call_id and annot_function_call_text.name=$1
UNION
- SELECT run.id as run_id, annot_run_text.name, annot_run_text.value
- FROM run, annot_run_text
- WHERE run.id=annot_run_text.run_id and annot_run_text.name=$1
+ SELECT run.id as run_id, annot_script_run_text.name, annot_script_run_text.value
+ FROM run, annot_script_run_text
+ WHERE run.id=annot_script_run_text.script_run_id and annot_script_run_text.name=$1
$$ LANGUAGE SQL;
-DROP TYPE compare_run_by_key_text_type;
+DROP TYPE compare_run_by_key_text_type CASCADE;
CREATE TYPE compare_run_by_key_text_type as (run_id VARCHAR, name VARCHAR, value VARCHAR);
CREATE OR REPLACE FUNCTION compare_run_by_key_text(name VARCHAR)
RETURNS SETOF compare_run_by_key_text_type
AS $$
- SELECT fun_call.run_id, annot_ds_text.name, annot_ds_text.value
- FROM annot_ds_text,ds_io,ds_cont,fun_call
- WHERE annot_ds_text.ds_id=ds_cont.in_id AND ds_cont.out_id=ds_io.ds_id AND
- ds_io.fun_call_id=fun_call.id AND annot_ds_text.name=$1
+ SELECT fun_call.run_id, annot_dataset_text.name, annot_dataset_text.value
+ FROM annot_dataset_text,dataset_io,dataset_containment,fun_call
+ WHERE annot_dataset_text.dataset_id=dataset_containment.in_id AND dataset_containment.out_id=dataset_io.dataset_id AND
+ dataset_io.function_call_id=fun_call.id AND annot_dataset_text.name=$1
UNION
- SELECT fun_call.run_id, annot_ds_text.name, annot_ds_text.value
- FROM fun_call, ds_io, annot_ds_text
- WHERE fun_call.id=ds_io.fun_call_id and ds_io.ds_id=annot_ds_text.ds_id and
- annot_ds_text.name=$1
+ SELECT fun_call.run_id, annot_dataset_text.name, annot_dataset_text.value
+ FROM fun_call, dataset_io, annot_dataset_text
+ WHERE fun_call.id=dataset_io.function_call_id and dataset_io.dataset_id=annot_dataset_text.dataset_id and
+ annot_dataset_text.name=$1
UNION
- SELECT fun_call.run_id, annot_fun_call_text.name, annot_fun_call_text.value
- FROM fun_call, annot_fun_call_text
- WHERE fun_call.id=annot_fun_call_text.fun_call_id and annot_fun_call_text.name=$1
+ SELECT fun_call.run_id, annot_function_call_text.name, annot_function_call_text.value
+ FROM fun_call, annot_function_call_text
+ WHERE fun_call.id=annot_function_call_text.function_call_id and annot_function_call_text.name=$1
UNION
- SELECT run.id as run_id, annot_run_text.name, annot_run_text.value
- FROM run, annot_run_text
- WHERE run.id=annot_run_text.run_id and annot_run_text.name=$1
+ SELECT run.id as run_id, annot_script_run_text.name, annot_script_run_text.value
+ FROM run, annot_script_run_text
+ WHERE run.id=annot_script_run_text.script_run_id and annot_script_run_text.name=$1
$$ LANGUAGE SQL;
-- CREATE OR REPLACE FUNCTION compare_run_by_annot_num(name VARCHAR)
@@ -162,23 +162,23 @@
-- value NUMERIC
-- )
-- AS $$
--- SELECT fun_call.workflow_id, annot_ds_num.name, annot_ds_num.value
--- FROM annot_ds_num,ds_usage,ds_containment,fun_call
--- WHERE annot_ds_num.id=ds_containment.in_id AND ds_containment.out_id=ds_usage.dataset_id AND
--- ds_usage.fun_call_id=fun_call.id AND annot_ds_num.name=$1
+-- SELECT fun_call.workflow_id, annot_dataset_num.name, annot_dataset_num.value
+-- FROM annot_dataset_num,dataset_io,dataset_containment,fun_call
+-- WHERE annot_dataset_num.id=dataset_containment.in_id AND dataset_containment.out_id=dataset_io.dataset_id AND
+-- dataset_io.function_call_id=fun_call.id AND annot_dataset_num.name=$1
-- UNION
--- SELECT fun_call.workflow_id, annot_ds_num.name, annot_ds_num.value
--- FROM fun_call, ds_usage, annot_ds_num
--- WHERE fun_call.id=ds_usage.fun_call_id and ds_usage.dataset_id=annot_ds_num.id and
--- annot_ds_num.name=$1
+-- SELECT fun_call.workflow_id, annot_dataset_num.name, annot_dataset_num.value
+-- FROM fun_call, dataset_io, annot_dataset_num
+-- WHERE fun_call.id=dataset_io.function_call_id and dataset_io.dataset_id=annot_dataset_num.id and
+-- annot_dataset_num.name=$1
-- UNION
-- SELECT fun_call.workflow_id, annot_p_num.name, annot_p_num.value
-- FROM fun_call, annot_p_num
-- WHERE fun_call.id=annot_p_num.id and annot_p_num.name=$1
-- UNION
--- SELECT workflow.id as workflow_id, annot_wf_num.name, annot_wf_num.value
+-- SELECT script_run.id as workflow_id, annot_wf_num.name, annot_wf_num.value
-- FROM workflow, annot_wf_num
--- WHERE workflow.id=annot_wf_num.id and annot_wf_num.name=$1
+-- WHERE script_run.id=annot_wf_num.id and annot_wf_num.name=$1
-- $$ LANGUAGE SQL;
@@ -188,57 +188,57 @@
-- name VARCHAR,
-- value VARCHAR)
-- AS $$
--- SELECT fun_call.workflow_id, annot_ds_txt.name, annot_ds_txt.value
--- FROM fun_call, ds_usage, annot_ds_txt
--- WHERE fun_call.id=ds_usage.fun_call_id and ds_usage.dataset_id=annot_ds_txt.id and
--- annot_ds_txt.name=$1
+-- SELECT fun_call.workflow_id, annot_dataset_txt.name, annot_dataset_txt.value
+-- FROM fun_call, dataset_io, annot_dataset_txt
+-- WHERE fun_call.id=dataset_io.function_call_id and dataset_io.dataset_id=annot_dataset_txt.id and
+-- annot_dataset_txt.name=$1
-- UNION
-- SELECT fun_call.workflow_id, annot_p_txt.name, annot_p_txt.value
-- FROM fun_call, annot_p_txt
-- WHERE fun_call.id=annot_p_txt.id and annot_p_txt.name=$1
-- UNION
--- SELECT workflow.id as workflow_id, annot_wf_txt.name, annot_wf_txt.value
+-- SELECT script_run.id as workflow_id, annot_wf_txt.name, annot_wf_txt.value
-- FROM workflow, annot_wf_txt
--- WHERE workflow.id=annot_wf_txt.id and annot_wf_txt.name=$1
+-- WHERE script_run.id=annot_wf_txt.id and annot_wf_txt.name=$1
-- $$ LANGUAGE SQL;
-CREATE OR REPLACE FUNCTION compare_run_by_annot_bool(name VARCHAR)
-RETURNS TABLE (
- workflow_id VARCHAR,
- name VARCHAR,
- value BOOLEAN
-)
-AS $$
- SELECT fun_call.workflow_id, annot_ds_bool.name, annot_ds_bool.value
- FROM fun_call, ds_usage, annot_ds_bool
- WHERE fun_call.id=ds_usage.fun_call_id and ds_usage.dataset_id=annot_ds_bool.id and
- annot_ds_bool.name=$1
- UNION
- SELECT fun_call.workflow_id, annot_p_bool.name, annot_p_bool.value
- FROM fun_call, annot_p_bool
- WHERE fun_call.id=annot_p_bool.id and annot_p_bool.name=$1
- UNION
- SELECT workflow.id as workflow_id, annot_wf_bool.name, annot_wf_bool.value
- FROM workflow, annot_wf_bool
- WHERE workflow.id=annot_wf_bool.id and annot_wf_bool.name=$1
-$$ LANGUAGE SQL;
+--CREATE OR REPLACE FUNCTION compare_run_by_annot_bool(name VARCHAR)
+--RETURNS TABLE (
+-- workflow_id VARCHAR,
+-- name VARCHAR,
+-- value BOOLEAN
+--)
+--AS $$
+-- SELECT fun_call.workflow_id, annot_dataset_bool.name, annot_dataset_bool.value
+-- FROM fun_call, dataset_io, annot_dataset_bool
+-- WHERE fun_call.id=dataset_io.function_call_id and dataset_io.dataset_id=annot_dataset_bool.id and
+-- annot_dataset_bool.name=$1
+-- UNION
+-- SELECT fun_call.workflow_id, annot_p_bool.name, annot_p_bool.value
+-- FROM fun_call, annot_p_bool
+-- WHERE fun_call.id=annot_p_bool.id and annot_p_bool.name=$1
+-- UNION
+-- SELECT script_run.id as workflow_id, annot_wf_bool.name, annot_wf_bool.value
+-- FROM workflow, annot_wf_bool
+-- WHERE script_run.id=annot_wf_bool.id and annot_wf_bool.name=$1
+--$$ LANGUAGE SQL;
-- correlate a parameter with workflow runtime statistics
CREATE OR REPLACE FUNCTION correlate_parameter_runtime(parameter_name VARCHAR)
RETURNS TABLE (
- workflow VARCHAR,
- workflow_starttime TIMESTAMP WITH TIME ZONE,
- workflow_duration NUMERIC,
+ run VARCHAR,
+ starttime TIMESTAMP WITH TIME ZONE,
+ duration NUMERIC,
parameter VARCHAR,
- parameter_value VARCHAR
+ value VARCHAR
)
AS $$
- SELECT workflow.id,to_timestamp(workflow.start_time),workflow.duration,ds_usage.parameter_name,variable.value
- FROM variable,ds_usage,fun_call,workflow
- WHERE variable.id=ds_usage.dataset_id AND ds_usage.fun_call_id=fun_call.id AND
- fun_call.workflow_id=workflow.id AND ds_usage.param_name=$1
+ SELECT script_run.id,script_run.start_time,script_run.duration,dataset_io.parameter,dataset.value
+ FROM dataset,dataset_io,fun_call,script_run
+ WHERE dataset.id=dataset_io.dataset_id AND dataset_io.function_call_id=fun_call.id AND
+ fun_call.run_id=script_run.id AND dataset_io.parameter=$1
$$ LANGUAGE SQL;
-- recursive query to find ancestor entities in a provenance graph
@@ -247,13 +247,13 @@
WITH RECURSIVE anc(ancestor,descendant) AS
(
SELECT parent AS ancestor, child AS descendant
- FROM parent_of
+ FROM provenance_graph_edge
WHERE child=$1
UNION
- SELECT parent_of.parent AS ancestor,
+ SELECT provenance_graph_edge.parent AS ancestor,
anc.descendant AS descendant
- FROM anc,parent_of
- WHERE anc.ancestor=parent_of.child
+ FROM anc, provenance_graph_edge
+ WHERE anc.ancestor=provenance_graph_edge.child
)
SELECT ancestor FROM anc
$$ LANGUAGE SQL;
@@ -280,15 +280,13 @@
function_name := 'compare_run_by_parameter';
WHEN 'annot_num' THEN
function_name := 'compare_run_by_annot_num';
- WHEN 'annot_txt' THEN
- function_name := 'compare_run_by_annot_txt';
- WHEN 'annot_bool' THEN
- function_name := 'compare_run_by_annot_bool';
+ WHEN 'annot_text' THEN
+ function_name := 'compare_run_by_annot_text';
END CASE;
IF i = 1 THEN
fromq := function_name || '(''' || property || ''') as t' || i;
ELSE
- fromq := fromq || ' INNER JOIN ' || function_name || '(''' || property || ''') as t' || i || ' USING (workflow_id)';
+ fromq := fromq || ' INNER JOIN ' || function_name || '(''' || property || ''') as t' || i || ' USING (run_id)';
END IF;
END LOOP;
q := selectq || ' FROM ' || fromq;
Modified: provenancedb/prepare-provenance-chart
===================================================================
--- provenancedb/prepare-provenance-chart 2012-05-09 22:18:28 UTC (rev 5785)
+++ provenancedb/prepare-provenance-chart 2012-05-10 12:27:54 UTC (rev 5786)
@@ -1,52 +1,22 @@
#!/bin/bash
-# invent an ID. doesn't need to have any meaning. needs to be unique.
-# uuidgen for now.
-# any time we ouptut a thread, prefix the WFID on the front.
-# should we do this in the logging code in swift proper? - it would be a
-# nicer way of getting the runid into the URI
-
export RUNID=$(basename $1 .log)
-
-export WFID="execute:${RUNID}:"
-export EXECUTE2PREFIX="execute2:${RUNID}:"
-
-# will output log information about datasets from a log file passed as $1
-
+export WFID="${RUNID}:"
+export EXECUTE2PREFIX="${RUNID}:"
cat $1 | grep ' PARAM ' | sed "s/^.* thread=\([^ ]*\).*direction=\([^ ]*\).*variable=\([^ ]*\).*provenanceid=\([^ ]*\).*\$/${WFID}\1 \2 \4 \3/" > tie-data-invocs.txt
-
-# 2007-12-13 14:29:13,967+0000 INFO AbstractDataNode dataset 2461363 has child 938665
cat $1 | grep ' CONTAINMENT ' | sed 's/^.*parent=\([^ ]*\) child=\([^ ]*\)$/\1 \2/' > tie-containers.txt
-
-#AbstractDataNode dataset 3814442 has filename file://localhost/q.out
cat $1 | grep ' FILENAME ' | sed 's/^.*dataset=\([^ ]*\) filename=\([^ ]*\).*$/\1 \2/' | sort | uniq > dataset-filenames.txt
-
cat $1 | grep ' VALUE ' | sed 's/^.*dataset=\([^ ]*\) VALUE=\(.*\)$/\1 \2/' | sort | uniq > dataset-values.txt
-
cat $1 | grep ' PROCEDURE ' | sed "s/^.*thread=\([^ ]*\) name=\([^ ]*\)\$/${WFID}\1 \2/" > invocation-procedure-names.txt
-
-info-to-extrainfo > extrainfo.txt
-
-info-to-runtime > runtime.txt
-
cat $1 | grep ' OPERATOR ' | sed 's/^.*thread=\([^ ]*\) operator="\([^ ]*\)" lhs=\([^ ]*\) rhs=\([^ ]*\) result=\([^ ]*\).*$/\1 \2 \3 \4 \5/' > operators.txt
-
-
-# 2009-03-19 19:15:35,244+0100 INFO vdl:arguments FUNCTION id=88000-0-4-4 name="f ilename" result=dataset:20090319-1915-xj8flg 13:720000000060
-# 2009-03-19 19:15:35,246+0100 INFO vdl:arguments FUNCTIONPARAMETER id=88001-0-4- 4 input=dataset:20090319-1915-xj8flg13:72000 0000058
-
cat $1 | grep ' FUNCTION ' | sed "s/^.*id=\([^ ]*\) name=\([^ ]*\) result=\([^ ]*\).*\$/$WFID\1 \2 \3/" > functions.txt
-# the IDs in functions.txt should be unique...
-# we could even test that...
-
cat $1 | grep ' FUNCTIONPARAMETER ' | sed "s/^.*id=\([^ ]*\) input=\([^ ]*\).*\$/$WFID\1 \2/" > function-inputs.txt
-
cat $1 | grep ' CREATEARRAY START ' | sed 's/^.* array=\([^ ]*\).*$/\1/' > createarray.txt
-
cat $1 | grep ' CREATEARRAY MEMBER ' | sed 's/^.* array=\([^ ]*\) index=\([^ ]*\) member=\([^ ]*\).*$/\1 \2 \3/' > createarray-members.txt
-
cat $1 | grep ' ARRAYRANGE ' | sed 's/^.* thread=\([^ ]*\).*$/\1/' > arrayrange.txt
-
-# 2009-05-18 21:19:20,295+0200 INFO vdl:mains SCOPE thread=0-6-5-1-5
-
cat $1 | grep ' SCOPE ' | sed 's/^.* thread=\([^ ]*\).*/\1/' > scopes.txt
+awk '/BEGIN SWIFTSCRIPT/,/END SWIFTSCRIPT/{if (!/BEGIN SWIFTSCRIPT/&&!/END SWIFTSCRIPT/)print}' $1 > script.txt
+awk '/BEGIN SITES/,/END SITES/{if (!/BEGIN SITES/&&!/END SITES/)print}' $1 > sites.txt
+awk '/BEGIN TC/,/END TC/{if (!/BEGIN TC/&&!/END TC/)print}' $1 > tc.txt
+info-to-extrainfo > extrainfo.txt
+info-to-runtime > runtime.txt
Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql 2012-05-09 22:18:28 UTC (rev 5785)
+++ provenancedb/prov-init.sql 2012-05-10 12:27:54 UTC (rev 5786)
@@ -1,46 +1,52 @@
-- this is the schema definition used for the main relational provenance
-- implementation (in both sqlite3 and postgres)
--- base relations
-drop table app_catalog cascade;
-drop table site_catalog cascade;
-drop table script cascade;
-drop table run cascade;
-drop table fun_call cascade;
-drop table app_fun_call cascade;
-drop table app_exec cascade;
+drop view script_run;
+drop view function_call;
+drop view application_execution;
+drop view runtime_info;
+drop view dataset;
+drop view dataset_io;
+drop view provenance_graph_edge;
+drop table annot_script_run_num cascade;
+drop table annot_script_run_text cascade;
+drop table annot_function_call_num cascade;
+drop table annot_function_call_text cascade;
+drop table annot_app_exec_num cascade;
+drop table annot_app_exec_text cascade;
+drop table annot_dataset_num cascade;
+drop table annot_dataset_text cascade;
drop table rt_info cascade;
-drop table ds cascade;
+drop table app_exec cascade;
+drop table app_fun_call cascade;
+drop table dataset_in cascade;
+drop table dataset_out cascade;
+drop table fun_call cascade;
+drop table run cascade;
+drop table tc_file cascade;
+drop table sites_file cascade;
+drop table script cascade;
drop table mapped cascade;
drop table primitive cascade;
-drop table ds_containment cascade;
-drop table ds_in cascade;
-drop table ds_out cascade;
-drop table annot_run_num cascade;
-drop table annot_run_text cascade;
-drop table annot_fun_call_num cascade;
-drop table annot_fun_call_text cascade;
-drop table annot_app_exec_num cascade;
-drop table annot_app_exec_text cascade;
-drop table annot_ds_num cascade;
-drop table annot_ds_text cascade;
+drop table dataset_containment cascade;
+drop table ds cascade;
-- application_catalog stores tc.file
-create table app_catalog (
+create table tc_file (
hash_value varchar(256) primary key,
content text
);
-- application_catalog stores tc.file
-create table site_catalog (
- hash_value varchar(256) primary key,
- content text
+create table sites_file (
+ hash_value varchar(256) primary key,
+ content text
);
-
+
-- script stores Swift script source codes
create table script (
- hash_value varchar(256) primary key,
- content text
+ hash_value varchar(256) primary key,
+ content text
);
-- run stores information about each script run log that has
@@ -48,122 +54,134 @@
-- status.
-- Might be interesting to store xml translation of the Swift script
-- here for prospective provenance and versioning.
-create table run
- (
- id varchar(256) primary key,
- log_filename varchar(2048),
- swift_version varchar(16),
- cog_version varchar(16),
- final_state varchar(32),
- start_time numeric,
- duration numeric,
- script_filename varchar(2048),
- script_hash varchar(256) references script (hash_value),
- application_catalog_hash varchar(256) references app_catalog (hash_value),
- site_catalog_hash varchar(256) references site_catalog (hash_value)
+create table run (
+ id varchar(256) primary key,
+ log_filename varchar(2048),
+ swift_version varchar(16),
+ cog_version varchar(16),
+ final_state varchar(32),
+ start_time numeric,
+ duration numeric,
+ script_filename varchar(2048),
+ script_hash varchar(256) references script (hash_value),
+ tc_file_hash varchar(256) references tc_file (hash_value),
+ sites_file_hash varchar(256) references sites_file (hash_value)
);
+create view script_run as
+ select id, log_filename, swift_version, cog_version, final_state,
+ to_timestamp(start_time) as start_time, duration, script_filename,
+ script_hash, tc_file_hash, sites_file_hash
+ from run;
+
-- process gives information about each process (in the OPM sense)
-- it is augmented by information in other tables
-- specifies the type of process. for any type, it
-- must be the case that the specific type table
-- has an entry for this process.
-- process types: internal, rootthread, execute, function, compound, scope, operator
-create table fun_call
- (
- id varchar(256),
- run_id varchar(256) references run (id) on delete cascade,
- type varchar(16),
- name varchar(256), -- in the case of an execute this refers to the transformation name in tc.data
- primary key (id, run_id)
-
+create table fun_call (
+ id varchar(256) primary key,
+ run_id varchar(256) references run (id) on delete cascade,
+ type varchar(16),
+ name varchar(256) -- in the case of an execute this refers to the transformation name in tc.data
);
-- this gives information about each execute.
-- each execute is identified by a unique URI. other information from
-- swift logs is also stored here. an execute is an OPM process.
-create table app_fun_call
- (
- id varchar(256),
- run_id varchar(256),
- name varchar(256), -- name of the app procedure that invokes the transformation
- start_time numeric,
- duration numeric,
- final_state varchar(32),
- scratch varchar(2048),
- foreign key (id, run_id) references fun_call,
- primary key (id, run_id)
+create table app_fun_call (
+ id varchar(256) primary key references fun_call (id),
+ name varchar(256), -- name of the app procedure that invokes the transformation
+ start_time numeric,
+ duration numeric,
+ final_state varchar(32),
+ scratch varchar(2048)
);
+create view function_call as
+ select fun_call.id, fun_call.name, fun_call.type, app_fun_call.name as app_catalog_name, fun_call.run_id as script_run_id,
+ to_timestamp(app_fun_call.start_time) as start_time, app_fun_call.duration, app_fun_call.final_state, app_fun_call.scratch
+ from
+ fun_call
+ left outer join
+ app_fun_call
+ on fun_call.id=app_fun_call.id;
+
-- this gives information about each application execution attempt, including
-- aggregate resource consumption. the app_exec_id is tied to per-execution-attempt
-- information such as wrapper logs
-create table app_exec
- (
- id varchar(256),
- app_fun_call_id varchar(256),
- run_id varchar(256),
+create table app_exec (
+ id varchar(256) primary key,
+ app_fun_call_id varchar(256) references app_fun_call (id),
start_time numeric,
duration numeric,
final_state varchar(32),
- site varchar(256),
- maxrss numeric,
- walltime numeric,
- systime numeric,
- usertime numeric,
- cpu numeric,
- fsin numeric,
- fsout numeric,
- timesswpd numeric,
- socketrecv numeric,
- socketsent numeric,
- majpfaults numeric,
- minpfaults numeric,
- ctxswinv numeric,
- ctxswvol numeric,
- foreign key(app_fun_call_id, run_id) references app_fun_call,
- primary key(id, app_fun_call_id, run_id)
+ site varchar(256)
);
+-- maxrss numeric,
+-- walltime numeric,
+-- systime numeric,
+-- usertime numeric,
+-- cpu numeric,
+-- fsin numeric,
+-- fsout numeric,
+-- timesswpd numeric,
+-- socketrecv numeric,
+-- socketsent numeric,
+-- majpfaults numeric,
+-- minpfaults numeric,
+-- ctxswinv numeric,
+-- ctxswvol numeric,
+create view application_execution as
+ select id, app_fun_call_id as function_call_id, to_timestamp(start_time) as start_time, duration, final_state, site
+ from app_exec;
+
+
-- app execution runtime info extracted from the /proc filesystem (assumes the app executed
-- in a Linux host)
-create table rt_info
- (
- app_exec_id varchar(256),
- app_fun_call_id varchar(256),
- run_id varchar(256),
+create table rt_info (
+ app_exec_id varchar(256) references app_exec (id),
timestamp numeric,
cpu_usage numeric,
max_phys_mem numeric,
max_virt_mem numeric,
io_read numeric,
io_write numeric,
- foreign key (app_exec_id, app_fun_call_id, run_id) references app_exec,
- primary key (app_exec_id, app_fun_call_id, run_id, timestamp)
+ primary key (app_exec_id, timestamp)
);
+create view runtime_info as
+ select app_exec_id,
+ to_timestamp(timestamp) as timestamp,
+ cpu_usage,
+ max_phys_mem,
+ max_virt_mem,
+ io_read,
+ io_write
+ from rt_info;
+
-- ds stores all dataset identifiers.
-create table ds
- (
+create table ds (
id varchar(256) primary key
- );
+);
-- file stores the filename mapped to each dataset.
-create table mapped
- (
+create table mapped (
id varchar(256) primary key references ds (id) on delete cascade,
filename varchar(2048)
- );
+);
-- dataset_values stores the value for each dataset which is known to have
-- a value (which is all assigned primitive types). No attempt is made here
-- to expose that value as an SQL type other than a string, and so (for
-- example) SQL numerical operations should not be expected to work, even
-- though the user knows that a particular dataset stores a numeric value.
-create table primitive
- ( id varchar(256) primary key references ds (id) on delete cascade,
- value varchar(2048)
- );
+create table primitive (
+ id varchar(256) primary key references ds (id) on delete cascade,
+ value varchar(2048)
+);
-- dataset_containment stores the containment hierarchy between
-- container datasets (arrays and structs) and their contents.
@@ -172,215 +190,108 @@
-- constructors and accessors, rather than, or in addition to,
-- a containment hierarchy. The relationship (such as array index or
-- structure member name) should also be stored in this table.
-create table ds_containment
- ( out_id varchar(256) references ds (id) on delete cascade,
- in_id varchar(256) references ds (id) on delete cascade,
- primary key (out_id,in_id)
- );
+create table dataset_containment (
+ out_id varchar(256) references ds (id) on delete cascade,
+ in_id varchar(256) references ds (id) on delete cascade,
+ primary key (out_id,in_id)
+);
+create view dataset as
+ select mapped.id, 'mapped' as type, mapped.filename, null as value
+ from mapped
+ union all
+ select primitive.id, 'primitive' as type, null as filename, primitive.value
+ from primitive
+ union all
+ select dataset_containment.out_id as id, 'composite' as type, null as filename, null as value
+ from dataset_containment;
-- dataset_usage records usage relationships between processes and datasets;
-- in SwiftScript terms, the input and output parameters for each
-- application procedure invocation; in OPM terms, the artificts which are
-- input to and output from each process that is a Swift execution
-create table ds_in
- (
- fun_call_id varchar(256),
- run_id varchar(256),
- ds_id varchar(256) references ds (id) on delete cascade,
- parameter varchar(256), -- the name of the parameter in this execute that
- -- this dataset was bound to. sometimes this must
- -- be contrived (for example, in positional varargs)
- foreign key (fun_call_id, run_id) references fun_call,
- primary key (fun_call_id, run_id, ds_id, parameter)
- );
+create table dataset_in (
+ function_call_id varchar(256) references fun_call (id),
+ dataset_id varchar(256) references ds (id) on delete cascade,
+ parameter varchar(256), -- the name of the parameter in this execute that
+ -- this dataset was bound to. sometimes this must
+ -- be contrived (for example, in positional varargs)
+ primary key (function_call_id, dataset_id, parameter)
+ );
-create table ds_out
- (
- fun_call_id varchar(256),
- run_id varchar(256),
- ds_id varchar(256) references ds (id) on delete cascade,
- parameter varchar(256), -- the name of the parameter in this execute that
- -- this dataset was bound to. sometimes this must
- -- be contrived (for example, in positional varargs)
- foreign key (fun_call_id, run_id) references fun_call,
- primary key (fun_call_id, run_id, ds_id, parameter)
- );
+create table dataset_out (
+ function_call_id varchar(256) references fun_call (id),
+ dataset_id varchar(256) references ds (id) on delete cascade,
+ parameter varchar(256), -- the name of the parameter in this execute that
+ -- this dataset was bound to. sometimes this must
+ -- be contrived (for example, in positional varargs)
+ primary key (function_call_id, dataset_id, parameter)
+);
+create view dataset_io as
+ select dataset_in.function_call_id, dataset_in.dataset_id, dataset_in.parameter, 'I' as type
+ from dataset_in
+union all
+ select dataset_out.function_call_id, dataset_out.dataset_id, dataset_out.parameter, 'O' as type
+ from dataset_out;
--- annotations
-create table annot_ds_num
- ( ds_id varchar(256) references ds (id) on delete cascade,
- name varchar(256),
- value numeric,
- primary key (ds_id, name)
- );
+create table annot_script_run_num (
+ script_run_id varchar(256) references run (id) on delete cascade,
+ name varchar(256),
+ value numeric,
+ primary key (script_run_id, name)
+);
-create table annot_ds_text
- ( ds_id varchar(256) references ds (id) on delete cascade,
- name varchar(256),
- value varchar(2048),
- primary key (ds_id, name)
- );
-
-create table annot_fun_call_num
- ( fun_call_id varchar(256),
- run_id varchar(256),
- name varchar(256),
- value numeric,
- foreign key (fun_call_id, run_id) references fun_call,
- primary key (fun_call_id, run_id, name)
- );
-
-create table annot_fun_call_text
- ( fun_call_id varchar(256),
- run_id varchar(256),
- name varchar(256),
- value varchar(2048),
- foreign key (fun_call_id, run_id) references fun_call,
- primary key (fun_call_id, run_id, name)
- );
-
-create table annot_run_num
- ( run_id varchar(256) references run (id) on delete cascade,
+create table annot_script_run_text ( script_run_id varchar(256) references run (id) on delete cascade,
name varchar(256),
- value numeric,
- primary key (run_id, name)
- );
-
-create table annot_run_text
- ( run_id varchar(256) references run (id) on delete cascade,
- name varchar(256),
value varchar(2048),
- primary key (run_id, name)
- );
+ primary key (script_run_id, name)
+);
-create table annot_app_exec_num
- ( id varchar(256),
- app_fun_call_id varchar(256),
- run_id varchar(256),
- name varchar(256),
- value numeric,
- foreign key (id, app_fun_call_id, run_id) references app_exec,
- primary key (id, app_fun_call_id, run_id, name)
- );
+create table annot_function_call_num (
+ function_call_id varchar(256) references fun_call (id) on delete cascade,
+ name varchar(256),
+ value numeric,
+ primary key (function_call_id, name)
+);
-create table annot_app_exec_text
- ( id varchar(256),
- app_fun_call_id varchar(256),
- run_id varchar(256),
- name varchar(256),
- value varchar(2048),
- foreign key (id, app_fun_call_id, run_id) references app_exec,
- primary key (id, app_fun_call_id, run_id, name)
- );
+create table annot_function_call_text (
+ function_call_id varchar(256) references fun_call (id) on delete cascade,
+ name varchar(256),
+ value varchar(2048),
+ primary key (function_call_id, name)
+);
+create table annot_app_exec_num (
+ app_exec_id varchar(256) references app_exec (id) on delete cascade,
+ name varchar(256),
+ value numeric,
+ primary key (app_exec_id, name)
+);
--- create table iq
--- ( idx serial primary key,
--- q varchar(2048)
--- );
-drop view ds_io;
-create view ds_io as
- select ds_in.fun_call_id as function_call_id, ds_in.ds_id as variable_id, ds_in.parameter
- from ds_in
-union all
- select ds_out.fun_call_id as function_call_id, ds_out.ds_id as variable_id, ds_out.parameter
- from ds_out;
+create table annot_app_exec_text (
+ app_exec_id varchar(256) references app_exec (id) on delete cascade,
+ name varchar(256),
+ value varchar(2048),
+ primary key (app_exec_id, name)
+);
-drop view pgraph_edge;
-create view pgraph_edge as
- select fun_call_id as parent,ds_id as child from ds_out
- union all
- select ds_id as parent,fun_call_id as child from ds_in
- union all
- select out_id as parent,in_id as child from ds_containment;
+create table annot_dataset_num (
+ dataset_id varchar(256) references ds (id) on delete cascade,
+ name varchar(256),
+ value numeric,
+ primary key (dataset_id, name)
+);
-drop view annot_text cascade;
-create view annot_text as
- select *
- from annot_run_text
- union all
- select *
- from annot_ds_text
- union all
- select *
- from annot_fun_call_text;
+create table annot_dataset_text(
+ dataset_id varchar(256) references ds (id) on delete cascade,
+ name varchar(256),
+ value varchar(2048),
+ primary key (dataset_id, name)
+);
-drop view annot_num cascade;
-create view annot_num as
- select *
- from annot_run_num
- union all
- select *
- from annot_ds_num
- union all
- select *
- from annot_fun_call_num;
-
--- views used for queries based on the schema summary
-
-drop view function_call;
-
-create view function_call as
- select fun_call.id, fun_call.name as name, fun_call.type, app_fun_call.name as app_catalog_name, fun_call.run_id as script_run_id,
- to_timestamp(app_fun_call.start_time) as start_time, app_fun_call.duration, app_fun_call.final_state, app_fun_call.scratch
- from fun_call
- left outer join
- app_fun_call on fun_call.id=app_fun_call.id;
-
-drop view variable;
-
-create view variable as
- select mapped.id, 'mapped' as type, mapped.filename, null as value
- from mapped
- union all
- select primitive.id, 'primitive' as type, null as filename, primitive.value
- from primitive
- union all
- select ds_containment.out_id as id, 'composite' as type, null as filename, null as value
- from ds_containment;
-
-drop view annotation;
-
-create view annotation as
- select annot_text.run_id as id, annot_text.name as key, annot_text.value as string_value, null as numeric_value
- from annot_text
- union all
- select annot_num.run_id as id, annot_num.name as key, null as string_value, annot_num.value as numeric_value
- from annot_num;
-
-drop view script_run;
-
-create view script_run as
- select id, log_filename, swift_version, cog_version, final_state,
- to_timestamp(start_time) as start_time, duration
- from run;
-
-drop view application_execution;
-
-create view application_execution as
- select id, app_fun_call_id as function_call_id, to_timestamp(start_time) as start_time, duration, final_state, site
- from app_exec;
-
-drop view runtime_info;
-
-create view runtime_info as
- select app_exec_id as application_execution_id, to_timestamp(timestamp) as timestamp, cpu_usage, max_phys_mem, max_virt_mem, io_read, io_write
- from rt_info;
-
-drop view produces;
-
-create view produces as
- select fun_call_id as function_call_id, ds_id as variable_id, parameter from ds_out;
-
-drop view consumes;
-
-create view consumes as
- select fun_call_id as function_call_id, ds_id as variable_id, parameter from ds_in;
-
-drop view variable_containment;
-
-create view variable_containment as
- select out_id as container, in_id as containee
- from ds_containment;
+create view provenance_graph_edge as
+ select function_call_id as parent, dataset_id as child from dataset_out
+ union all
+ select dataset_id as parent, function_call_id as child from dataset_in;
+
Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh 2012-05-09 22:18:28 UTC (rev 5785)
+++ provenancedb/prov-to-sql.sh 2012-05-10 12:27:54 UTC (rev 5786)
@@ -2,7 +2,7 @@
export RUNID=$(basename $1 .log)
-export WFID="execute:${RUNID}:"
+export WFID="${RUNID}:"
# TODO is there already a URI form for identifying workflows?
export WF="${RUNID}"
@@ -16,16 +16,18 @@
echo " - Function calls."
while read time duration thread localthread endstate tr_name scratch; do
- echo "INSERT INTO fun_call (id, type, run_id) VALUES ('$localthread', 'execute', '$WF');" >> /tmp/$RUNID.sql
- echo "INSERT INTO app_fun_call (id, run_id, name, start_time, duration, final_state, scratch) VALUES ('$localthread', '$WF', '$tr_name', $time, $duration, '$endstate', '$scratch');" >> /tmp/$RUNID.sql
+ id=$(echo "$thread" | sed "s/execute\://")
+ echo "INSERT INTO fun_call (id, type, run_id) VALUES ('$id', 'execute', '$WF');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO app_fun_call (id, name, start_time, duration, final_state, scratch) VALUES ('$id', '$tr_name', $time, $duration, '$endstate', '$scratch');" >> /tmp/$RUNID.sql
done < execute.global.event
echo " - Application executions."
while read start_time duration globalid id endstate thread site scratch; do
# cut off the last component of the thread, so that we end up at the
# parent thread id which should correspond with the execute-level ID
- inv_id="$(echo $thread | sed 's/-[^-]*$//')"
- echo "INSERT INTO app_exec (id, app_fun_call_id, run_id, start_time, duration, final_state, site) VALUES ('$id', '$inv_id', '$WF', $start_time, $duration, '$endstate', '$site');" >> /tmp/$RUNID.sql
+ inv_id="$WFID$(echo $thread | sed 's/-[^-]*$//')"
+ eid=$(echo "$globalid" | sed "s/execute2\://")
+ echo "INSERT INTO app_exec (id, app_fun_call_id, start_time, duration, final_state, site) VALUES ('$eid', '$inv_id', $start_time, $duration, '$endstate', '$site');" >> /tmp/$RUNID.sql
done < execute2.global.event
echo " - Mapped variables."
@@ -44,10 +46,11 @@
while read outer inner; do
echo "INSERT INTO ds (id) VALUES ('$outer');" >> /tmp/$RUNID.sql
echo "INSERT INTO ds (id) VALUES ('$inner');" >> /tmp/$RUNID.sql
- echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');" >> /tmp/$RUNID.sql
- echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('constructor:$outer', 'constructor', 'constructor', '$WF');" >> /tmp/$RUNID.sql
- echo "INSERT INTO ds_in (fun_call_id, ds_id, parameter) VALUES ('constructor:$outer', '$inner', 'element');" >> /tmp/$RUNID.sql
- echo "INSERT INTO ds_out (fun_call_id, ds_id, parameter) VALUES ('constructor:$outer', '$outer', 'collection');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO dataset_containment (out_id, in_id) VALUES ('$outer', '$inner');" >> /tmp/$RUNID.sql
+ cid=$(echo $outer | awk -F "-" '{print $3}')
+ echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('${WFID}$cid', 'constructor', 'constructor', '$WF');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO dataset_in (function_call_id, dataset_id, parameter) VALUES ('${WFID}$cid', '$inner', 'element');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO dataset_out (function_call_id, dataset_id, parameter) VALUES ('${WFID}$cid', '$outer', 'collection');" >> /tmp/$RUNID.sql
done < tie-containers.txt
echo " - Operator calls."
@@ -58,29 +61,27 @@
rhs=$(echo $rhs | awk 'BEGIN { FS = "=" }; {print $2}')
result=$(echo $result | awk 'BEGIN { FS = "=" }; {print $2}')
- operatorid="operator:$thread"
+ operatorid="${WFID}operator:$thread"
echo "INSERT INTO ds (id) VALUES ('$lhs');" >> /tmp/$RUNID.sql
echo "INSERT INTO ds (id) VALUES ('$rhs');" >> /tmp/$RUNID.sql
echo "INSERT INTO ds (id) VALUES ('$result');" >> /tmp/$RUNID.sql
echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$operatorid', 'operator', '$name', '$WF');" >> /tmp/$RUNID.sql
- echo "INSERT INTO ds_in (fun_call_id, ds_id, parameter) VALUES ('$operatorid', '$lhs', 'lhs');" >> /tmp/$RUNID.sql
- echo "INSERT INTO ds_in (fun_call_id, ds_id, parameter) VALUES ('$operatorid', '$rhs', 'rhs');" >> /tmp/$RUNID.sql
- echo "INSERT INTO ds_out (fun_call_id, ds_id, parameter) VALUES ('$operatorid', '$result', 'result');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO dataset_in (function_call_id, dataset_id, parameter) VALUES ('$operatorid', '$lhs', 'lhs');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO dataset_in (function_call_id, dataset_id, parameter) VALUES ('$operatorid', '$rhs', 'rhs');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO dataset_out (function_call_id, dataset_id, parameter) VALUES ('$operatorid', '$result', 'result');" >> /tmp/$RUNID.sql
done < operators.txt
echo " - Built-in function calls."
while read id name output; do
- fid=$(echo $id | awk -F ":" '{print $3}')
echo "INSERT INTO ds (id) VALUES ('$output');" >> /tmp/$RUNID.sql
- echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$fid', 'function', '$name', '$WF');" >> /tmp/$RUNID.sql
- echo "INSERT INTO ds_out (fun_call_id, ds_id, parameter) VALUES ('$fid', '$output', 'result');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$id', 'function', '$name', '$WF');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO dataset_out (function_call_id, dataset_id, parameter) VALUES ('$id', '$output', 'result');" >> /tmp/$RUNID.sql
done < functions.txt
while read id value; do
- fid=$(echo $id | awk -F ":" '{print $3}')
echo "INSERT INTO ds (id) VALUES ('$value');" >> /tmp/$RUNID.sql
- echo "INSERT INTO ds_in (fun_call_id, ds_id, parameter) VALUES ('$fid', '$value', 'undefined');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO dataset_in (function_call_id, dataset_id, parameter) VALUES ('$id', '$value', 'undefined');" >> /tmp/$RUNID.sql
done < function-inputs.txt
@@ -95,87 +96,99 @@
echo " - Compound functions."
while read start duration thread final_state procname ; do
if [ "$duration" != "last-event-line" ]; then
- echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$thread', 'compound', '$procname', '$WF');" >> /tmp/$RUNID.sql
+ compoundid=$WFID$thread
+ echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$compoundid', 'compound', '$procname', '$WF');" >> /tmp/$RUNID.sql
fi
done < compound.event
while read start duration thread final_state procname ; do
if [ "$duration" != "last-event-line" ]; then
- echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$thread', 'internal', '$procname', '$WF');" >> /tmp/$RUNID.sql
+ fqid=$WFID$thread
+ echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$fqid', 'internal', '$procname', '$WF');" >> /tmp/$RUNID.sql
fi
done < internalproc.event
while read t ; do
- echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$t', 'scope', 'scope', '$WF');" >> /tmp/$RUNID.sql
+ thread="${WFID}$t"
+ echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$thread', 'scope', 'scope', '$WF');" >> /tmp/$RUNID.sql
done < scopes.txt
-echo " - Variable consumption and production."
+echo " - Dataset consumption and production."
while read thread direction dataset variable rest; do
- fid=$(echo $thread | awk -F ":" '{print $3}')
if [ "$direction" == "input" ] ; then
- table=ds_in
+ table=dataset_in
else
- table=ds_out
+ table=dataset_out
fi
echo "INSERT INTO ds (id) VALUES ('$dataset');" >> /tmp/$RUNID.sql
- echo "INSERT INTO $table (fun_call_id, ds_id, parameter) VALUES ('$fid', '$dataset', '$variable');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO $table (function_call_id, dataset_id, parameter) VALUES ('$thread', '$dataset', '$variable');" >> /tmp/$RUNID.sql
done < tie-data-invocs.txt
echo " - Wrapper log resource consumption info."
if [ -f runtime.txt ]; then
while read execute2_id runtime; do
- #timestamp=$(echo $runtime | awk -F "," '{print $1}' | awk -F ":" '{print $2}')
- #cpu_usage=$(echo $runtime | awk -F "," '{print $2}' | awk -F ":" '{print $2}')
- #max_phys_mem=$(echo $runtime | awk -F "," '{print $3}' | awk -F ":" '{print $2}')
- #max_virtual_mem=$(echo $runtime | awk -F "," '{print $4}' | awk -F ":" '{print $2}')
- #io_read_bytes=$(echo $runtime | awk -F "," '{print $5}' | awk -F ":" '{print $2}')
- #io_write_bytes=$(echo $runtime | awk -F "," '{print $6}' | awk -F ":" '{print $2}')
- #echo "INSERT INTO rt_info (app_exec_id, tstamp, cpu_usage, max_phys_mem, max_virt_mem, io_read, io_write) VALUES ('$execute2_id', $timestamp, $cpu_usage, $max_phys_mem, $max_virtual_mem, $io_read_bytes, $io_write_bytes);" >> /tmp/$RUNID.sql
- maxrss=$(echo $runtime | awk -F "," '{print $1}' | awk -F ":" '{print $2}')
- walltime=$(echo $runtime | awk -F "," '{print $2}' | awk -F ":" '{print $2}')
- systime=$(echo $runtime | awk -F "," '{print $3}' | awk -F ":" '{print $2}')
- usertime=$(echo $runtime | awk -F "," '{print $4}' | awk -F ":" '{print $2}')
- cpu=$(echo $runtime | awk -F "," '{print $5}' | awk -F ":" '{print $2}' | awk -F "%" '{print $1}')
- fsin=$(echo $runtime | awk -F "," '{print $6}' | awk -F ":" '{print $2}')
- fsout=$(echo $runtime | awk -F "," '{print $7}' | awk -F ":" '{print $2}')
- timesswapped=$(echo $runtime | awk -F "," '{print $8}' | awk -F ":" '{print $2}')
- socketrecv=$(echo $runtime | awk -F "," '{print $9}' | awk -F ":" '{print $2}')
- socketsent=$(echo $runtime | awk -F "," '{print $10}' | awk -F ":" '{print $2}')
- majorpagefaults=$(echo $runtime | awk -F "," '{print $11}' | awk -F ":" '{print $2}')
- minorpagefaults=$(echo $runtime | awk -F "," '{print $12}' | awk -F ":" '{print $2}')
- contextswitchesinv=$(echo $runtime | awk -F "," '{print $13}' | awk -F ":" '{print $2}')
- contextswitchesvol=$(echo $runtime | awk -F "," '{print $14}' | awk -F ":" '{print $2}')
- echo "UPDATE app_exec SET maxrss=$maxrss, walltime=$walltime, systime=$systime, usertime=$usertime, cpu=$cpu, fsin=$fsin, fsout=$fsout, timesswpd=$timesswapped, socketrecv=$socketrecv, socketsent=$socketsent, majpfaults=$majorpagefaults, minpfaults=$minorpagefaults, ctxswinv=$contextswitchesinv, ctxswvol=$contextswitchesvol where id='$execute2_id' and run_id='$WF';" >> /tmp/$RUNID.sql
+ for key in $(echo maxrss walltime systime usertime cpu fsin fsout timesswapped socketrecv socketsent majorpagefaults minorpagefaults contextswitchesinv contextswitchesvol); do
+ value=$(echo $runtime | awk -F "," '{print $1}' | awk -F ":" '{print $2}')
+ echo "INSERT INTO annot_app_exec_num VALUES ('$execute2_id','$key',$value)" >> /tmp/$RUNID.sql
+ done
done < runtime.txt
fi
echo " - Function call names."
while read thread appname; do
- fid=$(echo $thread | awk -F ":" '{print $3}')
- echo "UPDATE fun_call SET name='$appname' WHERE id='$fid';" >> /tmp/$RUNID.sql
+ echo "UPDATE fun_call SET name='$appname' WHERE id='$thread';" >> /tmp/$RUNID.sql
done < invocation-procedure-names.txt
echo " - Wrapper log extra info."
if [ -f extrainfo.txt ]; then
while read execute2_id extrainfo; do
- eid=$(echo $execute2_id | awk -F ":" '{print $3}')
echo $extrainfo | awk -F ";" '{ for (i = 1; i <= NF; i++)
print $i
}' | awk -F "=" '{ print $1 " " $2 }' | awk -F ":" '{ print $1 " " $2 }' > fields.txt
- fid=$($SQLCMD --tuples-only -c "select app_fun_call_id from app_exec where id='$eid' and run_id='$WF';" | awk '{print $1}')
+ id=$($SQLCMD --tuples-only -c "select app_fun_call_id from app_exec where id='$execute2_id';" | awk '{print $1}')
while read name type value; do
if [ "$type" = "num" ]; then
- echo "INSERT INTO annot_app_exec_num (id, fun_call_id, run_id, name, value) VALUES ('$eid', '$fid', '$name', $value);" >> /tmp/$RUNID.sql
+ echo "INSERT INTO annot_app_exec_num (id, name, value) VALUES ('$id', '$name', $value);" >> /tmp/$RUNID.sql
fi
if [ "$type" = "txt" ]; then
- echo "INSERT INTO annot_app_exec_text (id, fun_call_id, run_id, name, value) VALUES ('$eid', '$fid', '$name', '$value');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO annot_app_exec_text (id, name, value) VALUES ('$id', '$name', '$value');" >> /tmp/$RUNID.sql
fi
done < fields.txt
done < extrainfo.txt
fi
+echo " - Prospective provenance (script, tc, sites)."
+script_hash=$(openssl dgst -sha1 script.txt | awk '{ print $2 }')
+EXISTING=$($SQLCMD --tuples-only -c "select count(*) from script where hash_value='$script_hash';")
+if [ "$EXISTING" -eq "0" ]; then
+ content=$(cat script.txt | sed "s/'/''/g")
+ echo "INSERT INTO script VALUES ('$script_hash', '$content');" >> /tmp/$RUNID.sql
+fi
+echo "UPDATE run SET script_hash='$script_hash' WHERE id='$WF';" >> /tmp/$RUNID.sql
+
+
+
+tc_hash=$(openssl dgst -sha1 tc.txt | awk '{ print $2 }')
+EXISTING=$($SQLCMD --tuples-only -c "select count(*) from tc_file where hash_value='$tc_hash';")
+if [ "$EXISTING" -eq "0" ]; then
+ content=$(cat tc.txt | sed "s/'/''/g")
+ echo "INSERT INTO tc_file VALUES ('$tc_hash', '$content');" >> /tmp/$RUNID.sql
+fi
+echo "UPDATE run SET tc_file_hash='$tc_hash' WHERE id='$WF';" >> /tmp/$RUNID.sql
+
+
+sites_hash=$(openssl dgst -sha1 sites.txt | awk '{ print $2 }')
+EXISTING=$($SQLCMD --tuples-only -c "select count(*) from sites_file where hash_value='$sites_hash';")
+if [ "$EXISTING" -eq "0" ]; then
+ content=$(cat sites.txt | sed "s/'/''/g")
+ echo "INSERT INTO sites_file VALUES ('$sites_hash', '$content');" >> /tmp/$RUNID.sql
+fi
+echo "UPDATE run SET sites_file_hash='$sites_hash' WHERE id='$WF';" >> /tmp/$RUNID.sql
+
+
+
echo "Finished SQL generation."
echo "Exporting provenance to database..."
$SQLCMD -f /tmp/$RUNID.sql 1> /dev/null 2> /tmp/$RUNID-provenancedb-error.log
Modified: provenancedb/swift-prov-import-all-logs
===================================================================
--- provenancedb/swift-prov-import-all-logs 2012-05-09 22:18:28 UTC (rev 5785)
+++ provenancedb/swift-prov-import-all-logs 2012-05-10 12:27:54 UTC (rev 5786)
@@ -31,16 +31,16 @@
$SQLCMD < $PROVDIR/prov-init.sql
fi
-while read start version filename; do
+while read start version cogversion filename; do
export IDIR=$(echo $filename | sed 's/\.log$/.d/')
COG_VERSION=$(grep -m 1 -E 'Swift .* swift-r[0-9]*' $filename | sed 's/.*Swift .* cog-r\([0-9]*\).*/\1/')
echo IDIR=$IDIR
if [ $version -ge 1538 ]; then
echo -n "Log: $filename ... "
-
# TODO: does not work in sqlite
- EXISTING=$($SQLCMD --tuples-only -c "select count(*) from run where log_filename='$filename';")
+ fname=$(echo $filename | sed -e 's:[^\`].*/::')
+ EXISTING=$($SQLCMD --tuples-only -c "select count(*) from run where log_filename='$fname';")
if [ "$EXISTING" -eq "0" ]; then
PROV_ENABLED=$(grep provenanceid $filename | wc -l)
@@ -54,11 +54,11 @@
fi
export RUNID=$(basename $filename .log)
-
+ source_file=$(echo $fname | sed "s/-[0-9]*-[0-9]*-[0-9a-z]*\.log$/\.swift/")
export WF="${RUNID}"
#echo "BEGIN TRANSACTION;" > /tmp/$WF.sql
- echo "INSERT INTO run (id, log_filename, swift_version, cog_version, final_state) VALUES ('$WF','$filename','$version', '$COG_VERSION', '$wfstatus');" >> /tmp/$WF.sql
+ echo "INSERT INTO run (id, log_filename, script_filename, swift_version, cog_version, final_state) VALUES ('$WF','$fname','$source_file','$version', '$COG_VERSION', '$wfstatus');" >> /tmp/$WF.sql
echo version $version in log file $filename
echo ============= will import =============
Added: provenancedb/swift_mod/_swiftwrap_runtime_aggregate
===================================================================
--- provenancedb/swift_mod/_swiftwrap_runtime_aggregate (rev 0)
+++ provenancedb/swift_mod/_swiftwrap_runtime_aggregate 2012-05-10 12:27:54 UTC (rev 5786)
@@ -0,0 +1,605 @@
+#!/bin/bash
+# this script must be invoked inside of bash, not plain sh
+# note that this script modifies $IFS
+
+# Toggle debugging output from debug()
+DEBUG=0
+
+infosection() {
+ echo >& "$INFO"
+ echo "_____________________________________________________________________________" >& "$INFO"
+ echo >& "$INFO"
+ echo " $1" >& "$INFO"
+ echo "_____________________________________________________________________________" >& "$INFO"
+ echo >& "$INFO"
+}
+
+gather_proc_info()
+{
+ #TIME_PID=$!
+ #sleep 0.1
+ ##EXEC_PID=$#(ps -o pid --ppid $TIME_PID --no-headers)
+ EXEC_PID=$!
+ SAMPLING_INTERVAL=0.1
+ while true
+ do
+
+ MAX_VIRTUAL_MEM=$(grep "VmPeak:" /proc/$EXEC_PID/status | awk '{print $2}')
+ if [ -z "$MAX_VIRTUAL_MEM" ]; then
+ #process finished
+ break
+ fi
+
+ MAX_PHYS_MEM=$(grep "VmHWM:" /proc/$EXEC_PID/status | awk '{print $2}')
+ if [ -z "$MAX_PHYS_MEM" ]; then
+ #process finished
+ break
+ fi
+
+ READ_BYTES=$(grep "read_bytes:" /proc/$EXEC_PID/io | awk '{print $2}')
+ if [ -z "$READ_BYTES" ]; then
+ #process finished
+ break
+ fi
+
+ WRITE_BYTES=$(grep "^write_bytes:" /proc/$EXEC_PID/io | awk '{print $2}')
+ if [ -z "$WRITE_BYTES" ]; then
+ #process finished
+ break
+ fi
+
+ STEP_DATE=$(date +%s).$(date +%N)
+ PSLINE=$(ps auxw | grep $EXEC_PID | grep -v grep)
+ if [ -z "$PSLINE" ]; then
+ #process finished
+ break
+ fi
+ CPU_USAGE=$(echo $PSLINE | awk '{print $3}')
+ log "RUNTIME_INFO=timestamp:$STEP_DATE,cpu_usage:$CPU_USAGE,max_phys_mem:$MAX_PHYS_MEM,max_virtual_mem:$MAX_VIRTUAL_MEM,io_read_bytes:$READ_BYTES,io_write_bytes:$WRITE_BYTES"
+ INT_SAMPLING_INTERVAL=$(echo "$SAMPLING_INTERVAL/1" | bc)
+ if [ "$INT_SAMPLING_INTERVAL" -lt 60 ]; then
+ SAMPLING_INTERVAL=$(echo "$SAMPLING_INTERVAL+0.1" | bc)
+ fi
+ sleep $SAMPLING_INTERVAL
+ done
+ wait $EXEC_PID
+}
+
+info() {
+ infosection "command line"
+ echo $COMMANDLINE 2>&1 >& "$INFO"
+ infosection "uname -a"
+ uname -a 2>&1 >& "$INFO"
+ infosection "id"
+ id 2>&1 >& "$INFO"
+ infosection "env"
+ env 2>&1 >& "$INFO"
+ infosection "df"
+ df 2>&1 >& "$INFO"
+ if [ -e "/proc/cpuinfo" ]; then
+ infosection "/proc/cpuinfo"
+ cat /proc/cpuinfo 2>&1 >& "$INFO"
+ fi
+ if [ -e "/proc/meminfo" ]; then
+ infosection "/proc/meminfo"
+ cat /proc/meminfo 2>&1 >& "$INFO"
+ fi
+ if [ -f "$STDOUT" ] ; then
+ infosection "stdout"
+ cat $STDOUT >& "$INFO"
+ fi
+ if [ -f "$STDERR" ] ; then
+ infosection "stderr"
+ cat $STDERR >& "$INFO"
+ fi
+}
+
+logstate() {
+ echo "Progress " `date +"%Y-%m-%d %H:%M:%S.%N%z"` " $@" >& "$INFO"
+}
+
+log() {
+ echo "$@" >& "$INFO"
+}
+
+debug() {
+ [[ $DEBUG == 1 ]] && echo "$@" >& "$INFO"
+}
+
+fail() {
+ EC=$1
+ shift
+
+ if [ "$STATUSMODE" != "files" ]; then
+ mkdir -p $WFDIR/status/$JOBDIR
+ fi
+
+ echo $@ >"$WFDIR/status/$JOBDIR/${ID}-error"
+
+ log $@
+ info
+ if [ "$STATUSMODE" = "files" ]; then
+ exit 0
+ else
+ exit $EC
+ fi
+}
+
+checkError() {
+ if [ "$?" != "0" ]; then
+ fail $@
+ fi
+}
+
+checkEmpty() {
+ if [ "$1" == "" ]; then
+ shift
+ fail 254 $@
+ fi
+}
+
+checkparamfile() {
+ log "checking for paramfile"
+ if [ "$1" == "-p" ]; then
+ JOBDIR=$2
+ PARAMFILE=${WFDIR}/parameters/${JOBDIR}/param-${ID}
+ fi
+ log "paramfile is: $PARAMFILE"
+}
+
+getarg() {
+ NAME=$1
+ shift
+ VALUE=""
+ SHIFTCOUNT=0
+ if [ "$PARAMFILE" == "" ] && [ "$1" == "$NAME" ]; then
+ shift
+ let "SHIFTCOUNT=$SHIFTCOUNT+1"
+ while [ "${1:0:1}" != "-" ] && [ "$#" != "0" ]; do
+ VALUE="$VALUE $1"
+ shift
+ let "SHIFTCOUNT=$SHIFTCOUNT+1"
+ done
+ VALUE="${VALUE:1}"
+ elif [ "$PARAMFILE" != "" ] && grep -E "^$NAME " $PARAMFILE ; then
+ VALUE=$(grep -E "^$NAME " $PARAMFILE | cut -d ' ' -f 2-)
+ else
+ fail 254 "Missing $NAME argument"
+ fi
+}
+
+openinfo() {
+ exec 3<> $1
+ INFO=3
+}
+
+closeinfo() {
+ exec 3>&-
+}
+
+contains() {
+ ARRAY=$1
+ X=$2
+
+ for a in ${!ARRAY}
+ do
+ if [[ ${a} == ${X} ]]; then
+ return 0
+ fi
+ done
+ return 1
+}
+
+genScripts() {
+ echo "#!/bin/bash" > run.sh
+ echo -n "\"$EXEC\" " >> run.sh
+ for CMDARG in "${CMDARGS[@]}"; do
+ echo -n "\"$CMDARG\" " >> run.sh
+ done
+ echo "1>\"$STDOUT\" 2>\"$STDERR\"" >> run.sh
+ chmod +x run.sh
+}
+
+cdm_local_output()
+{
+ L=$1
+
+ if [[ $CDM_FILE == "" ]]; then
+ return
+ fi
+
+ CDM_POLICY=$( cdm_lookup shared/cdm.pl $CDM_FILE $L )
+ if [[ $CDM_POLICY == "LOCAL" ]]; then
+ cdm_local_output_perform $L $CDM_POLICY
+ fi
+}
+
+cdm_local_output_perform()
+{
+ L=$1
+ TOOL=$2
+ REMOTE_DIR=$3
+ FLAGS=$3
+ log "Copying $REMOTE_DIR/$FILE to $JOBDIR/$FILE"
+ mkdir -p $REMOTE_DIR
+ checkError 254 "CDM[LOCAL]: mkdir -p $REMOTE_DIR failed!"
+ $TOOL $FLAGS $JOBDIR/$FILE $REMOTE_DIR/$FILE
+ checkError 254 "CDM[LOCAL]: Tool failed!"
+}
+
+cdm_gather()
+{
+ GATHER_OUTPUT=${*}
+ if [[ $CDM_FILE == "" ]]; then
+ return
+ fi
+ if [[ $GATHER_OUTPUT == "" ]]; then
+ return
+ fi
+
+ cdm_gather_action $GATHER_MAX $GATHER_OUTPUT
+}
+
+COMMANDLINE=$@
+
+# get the parent directory of the directory containing _swiftwrap, to use
+# as the run directory
+# this assumes that _swiftwrap is being executed from the top level of
+# the shared directory, and that shared directory is in the top level
+# of the workflow run directory
+WFDIR=$(dirname $(dirname $0))
+
+cd $WFDIR
+
+# make the WFDIR absolute
+WFDIR=$(pwd)
+PARAMFILE=
+
+openinfo "wrapper.log"
+ID=$1
+checkEmpty "$ID" "Missing job ID"
+
+shift
+
+checkparamfile "$@"
+
+# JOBDIR might have been assigned through the -p option, or might
+# be a parameter here
+if [ "$JOBDIR" == "" ] ; then
+ getarg "-jobdir" "$@"
+ JOBDIR=$VALUE
+ shift $SHIFTCOUNT
+fi
+
+getarg "-scratch" "$@"
+SCRATCH=$VALUE
+shift $SHIFTCOUNT
+
+if [ "X$PROGRESSIVE_INFO" == "X" ] && [ "X$SCRATCH" != "X" ]; then
+ INFODIR=$SCRATCH
+else
+ INFODIR=$WFDIR/info/$JOBDIR
+fi
+checkEmpty "$JOBDIR" "Missing job directory prefix"
+mkdir -p $INFODIR
+closeinfo
+
+if [ -z $MPI_RANK ]; then
+ INFOFILE="$INFODIR/${ID}-info"
+else
+ INFOFILE="$INFODIR/${ID}-${MPI_RANK}-info"
+fi
+rm -f $INFOFILE
+openinfo "$INFOFILE"
+
+logstate "LOG_START"
+infosection "Wrapper (_swiftwrap)"
+
+getarg "-e" "$@"
+EXEC=$VALUE
+shift $SHIFTCOUNT
+
+getarg "-out" "$@"
+STDOUT=$VALUE
+shift $SHIFTCOUNT
+
+getarg "-err" "$@"
+STDERR=$VALUE
+shift $SHIFTCOUNT
+
+getarg "-i" "$@"
+STDIN=$VALUE
+shift $SHIFTCOUNT
+
+getarg "-d" "$@"
+DIRS=$VALUE
+shift $SHIFTCOUNT
+
+getarg "-if" "$@"
+INF=$VALUE
+shift $SHIFTCOUNT
+
+getarg "-of" "$@"
+OUTF=$VALUE
+shift $SHIFTCOUNT
+
+getarg "-k" "$@"
+KICKSTART=$VALUE
+shift $SHIFTCOUNT
+
+getarg "-cdmfile" "$@"
+CDM_FILE=
+if [ "X$VALUE" != "X" ]; then
+ CDM_FILE=shared/$VALUE
+fi
+shift $SHIFTCOUNT
+
+getarg "-status" "$@"
+STATUSMODE=$VALUE
+shift $SHIFTCOUNT
+
+declare -a CMDARGS
+if [ "$PARAMFILE" == "" ] && [ "$1" == "-a" ] ; then
+ shift
+ CMDARGS=("$@")
+elif [ "$PARAMFILE" != "" ] ; then
+ CMDARGS=()
+ FIRST=1
+ while read line ; do
+ if [ "$FIRST" == "1" ] ; then
+ CMDARGS=("$line")
+ FIRST=0
+ else
+ CMDARGS=("${CMDARGS[@]}" "$line")
+ fi
+ done < <(grep -E "^-a " $PARAMFILE | cut -d " " -f 2-)
+else
+ fail 254 "Missing arguments (-a option)"
+fi
+
+if [ "$STATUSMODE" = "files" ]; then
+ mkdir -p $WFDIR/status/$JOBDIR
+fi
+
+if [ "X$CDM_FILE" != "X" ]; then
+ logstate "SOURCE_CDM_LIB $WFDIR/shared/cdm_lib.sh"
+ source $WFDIR/shared/cdm_lib.sh
+ checkError 254 "Could not source: $WFDIR/shared/cdm_lib.sh"
+fi
+
+if [ "X$SCRATCH" != "X" ]; then
+ log "Job directory mode is: local copy"
+ DIR=$SCRATCH/$JOBDIR/$ID
+ COPYNOTLINK=1
+else
+ log "Job directory mode is: link on shared filesystem"
+ DIR=jobs/$JOBDIR/$ID
+ COPYNOTLINK=0
+fi
+
+PATH=$PATH:/bin:/usr/bin
+
+if [ "$PATHPREFIX" != "" ]; then
+ export PATH=$PATHPREFIX:$PATH
+fi
+
+if [ "$SWIFT_EXTRA_INFO" != "" ]; then
+ log "EXTRAINFO=$($SWIFT_EXTRA_INFO)"
+fi
+
+if [ "X${EXEC:0:1}" != "X/" ] ; then
+ export ORIGEXEC=$EXEC
+ export EXEC=$(which $EXEC)
+ if [ "X$EXEC" = "X" ] ; then
+ fail 254 "Cannot find executable $ORIGEXEC on site system path"
+ fi
+fi
+
+debug "PID=$$"
+log "PWD=$PWD"
+log "DIR=$DIR"
+log "EXEC=$EXEC"
+log "STDIN=$STDIN"
+log "STDOUT=$STDOUT"
+log "STDERR=$STDERR"
+log "DIRS=$DIRS"
+log "INF=$INF"
+log "OUTF=$OUTF"
+log "KICKSTART=$KICKSTART"
+log "CDM_FILE=$CDM_FILE"
+[ -z $MPI_RANK ] && log "MPI_RANK=$MPI_RANK"
+log "ARGS=$@"
+log "ARGC=$#"
+IFS="|"
+
+logstate "CREATE_JOBDIR"
+mkdir -p $DIR
+checkError 254 "Failed to create job directory $DIR"
+log "Created job directory: $DIR"
+
+if [[ $MPI_RANK == "" || $MPI_RANK == 0 ]]; then
+
+logstate "CREATE_INPUTDIR"
+for D in $DIRS ; do
+ mkdir -p "$DIR/$D" 2>&1 >>"$INFO"
+ checkError 254 "Failed to create input directory $D"
+ log "Created output directory: $DIR/$D"
+done
+
+logstate "LINK_INPUTS"
+for L in $INF ; do
+ CDM_POLICY="DEFAULT"
+ if [[ $CDM_FILE != "" ]]; then
+ CDM_POLICY=$( cdm_lookup shared/cdm.pl $CDM_FILE $L )
+ fi
+ if [[ $CDM_POLICY != "DEFAULT" && $CDM_POLICY != "EXTERNAL"* ]]; then
+ log "CDM_POLICY: $L -> $CDM_POLICY"
+ eval cdm_action $DIR "INPUT" $L $CDM_POLICY
+ continue
+ fi
+ if [ $COPYNOTLINK = 1 ]; then
+ cp "$WFDIR/shared/$L" "$DIR/$L" 2>&1 >& $INFO
+ checkError 254 "Failed to copy input file $L"
+ log "Copied input: $WFDIR/shared/$L to $DIR/$L"
+ else
+ [ -f $WFDIR/shared/$L ]
+ checkError 254 "Could not locate input file: $L"
+ ln -s "$WFDIR/shared/$L" "$DIR/$L" 2>&1 >& $INFO
+ checkError 254 "Failed to link input file $L"
+ log "Linked input: $WFDIR/shared/$L to $DIR/$L"
+ fi
+done
+
+if [ ! -z $CDM_FILE ]; then
+ logstate "LINK_CDM_OUTPUTS"
+ SKIPPED_OUTPUT=()
+ GATHER_OUTPUT=()
+ for L in $OUTF ; do
+ CDM_POLICY=$( cdm_lookup shared/cdm.pl $CDM_FILE $L )
+ if [[ $CDM_POLICY != "DEFAULT" &&
+ $CDM_POLICY != "BROADCAST"* ]]; then
+ log "CDM_POLICY: $L -> $CDM_POLICY"
+ eval cdm_action $DIR "OUTPUT" $L $CDM_POLICY
+ SKIPPED_OUTPUT=( $SKIPPED_OUTPUT $L )
+ fi
+ if [ $CDM_POLICY == "GATHER" ]; then
+ GATHER_OUTPUT=( $GATHER_OUTPUT $L )
+ elif [ $CDM_POLICY == "LOCAL" ]; then
+ CDM_LOCAL_OUTPUT=( $CDM_LOCAL_OUTPUT $L )
+ fi
+ done
+fi
+
+fi # MPI_RANK==0
+
+debug "Moving to jobdir: $DIR"
+cd $DIR
+if [ $? != 0 ]; then
+ log "PWD: $PWD"
+ log $( find . )
+ fail 254 "Could not cd to: $DIR"
+fi
+logstate "EXECUTE"
+
+debug "Command line: $EXEC ${CMDARGS[@]}"
+
+if [ ! -f "$EXEC" ]; then
+ fail 254 "The executable $EXEC does not exist"
+fi
+if [ ! -x "$EXEC" ]; then
+ fail 254 "The executable $EXEC does not have the executable bit set"
+fi
+if [ "$KICKSTART" == "" ]; then
+ if [ "$STDIN" == "" ]; then
+ if [ "$SWIFT_GEN_SCRIPTS" != "" ]; then
+ genScripts
+ fi
+ /usr/bin/time -f "RUNTIME_AGGR=maxrss:%M,walltime:%e,systime:%S,usertime:%U,cpu:%P,fsin:%I,fsout:%O,timesswapped:%W,socketrecv:%r,socketsent:%s,majorpagefaults:%F,minorpagefaults:%R,contextswitchesinv:%c,contextswitchesvol:%w" "$EXEC" "${CMDARGS[@]}" 1>"$STDOUT" 2>"$STDERR"
+ #"$EXEC" "${CMDARGS[@]}" 1>"$STDOUT" 2>"$STDERR" &
+ #gather_proc_info
+ RTAGGR=$(cat "$STDERR" | grep RUNTIME_AGGR)
+ log "$RTAGGR"
+ else
+ if [ "$SWIFT_GEN_SCRIPTS" != "" ]; then
+ genScripts
+ fi
+ /usr/bin/time -f "RUNTIME_AGGR=maxrss:%M,walltime:%e,systime:%S,usertime:%U,cpu:%P,fsin:%I,fsout:%O,timesswapped:%W,socketrecv:%r,socketsent:%s,majorpagefaults:%F,minorpagefaults:%R,contextswitchesinv:%c,contextswitchesvol:%w" "$EXEC" "${CMDARGS[@]}" 1>"$STDOUT" 2>"$STDERR" <"$STDIN"
+ #"$EXEC" "${CMDARGS[@]}" 1>"$STDOUT" 2>"$STDERR" <"$STDIN" &
+ #gather_proc_info
+ RTAGGR=$(cat "$STDERR" | grep RUNTIME_AGGR)
+ log "$RTAGGR"
+ fi
+ checkError $? "Application $EXEC failed with an exit code of $?"
+else
+ if [ ! -f "$KICKSTART" ]; then
+ log "Kickstart executable ($KICKSTART) not found"
+ fail 254 "The Kickstart executable ($KICKSTART) was not found"
+ elif [ ! -x "$KICKSTART" ]; then
+ log "Kickstart executable ($KICKSTART) is not executable"
+ fail 254 "The Kickstart executable ($KICKSTART) does not have the executable bit set"
+ else
+ mkdir -p $WFDIR/kickstart/$JOBDIR
+ log "Using Kickstart ($KICKSTART)"
+ if [ "$STDIN" == "" ]; then
+ "$KICKSTART" -H -o "$STDOUT" -e "$STDERR" "$EXEC" "$@" 1>kickstart.xml 2>"$STDERR"
+ else
+ "$KICKSTART" -H -o "$STDOUT" -i "$STDIN" -e "$STDERR" "$EXEC" "$@" 1>kickstart.xml 2>"$STDERR"
+ fi
+ export APPEXIT=$?
+ mv -f kickstart.xml "$WFDIR/kickstart/$JOBDIR/$ID-kickstart.xml" 2>&1 >& "$INFO"
+ checkError 254 "Failed to copy Kickstart record to shared directory"
+ if [ "$APPEXIT" != "0" ]; then
+ fail $APPEXIT "Application $EXEC failed with an exit code of $APPEXIT"
+ fi
+ fi
+fi
+
+log "Moving back to workflow directory $WFDIR"
+cd $WFDIR
+if [ $? != 0 ]; then
+ fail 254 "Could not cd to workflow directory: $WFDIR"
+fi
+
+logstate "EXECUTE_DONE"
+log "Job ran successfully"
+
+if [[ $MPI_RANK == "" || $MPI_RANK == 0 ]]; then
+
+MISSING=
+for O in $OUTF ; do
+ if [ ! -f "$DIR/$O" ]; then
+ if [ "$MISSING" == "" ]; then
+ MISSING=$O
+ else
+ MISSING="$MISSING, $O"
+ fi
+ fi
+done
+if [ "$MISSING" != "" ]; then
+ log $( find . )
+ fail 254 "The following output files were not created by the application: $MISSING"
+fi
+
+logstate "MOVING_OUTPUTS $OUTF"
+for O in $OUTF ; do
+ if ! contains SKIPPED_OUTPUT $O ; then
+ mv "$DIR/$O" "$WFDIR/shared/$O" 2>&1 >& "$INFO"
+ checkError 254 "Failed to move output file $O to shared directory"
+ fi
+done
+
+cdm_local_output $CDM_LOCAL_OUTPUT
+cdm_gather $GATHER_OUTPUT
+
+logstate "RM_JOBDIR"
+rm -rf "$DIR" 2>&1 >& "$INFO"
+checkError 254 "Failed to remove job directory $DIR"
+
+if [ "$STATUSMODE" = "files" ]; then
+ logstate "TOUCH_SUCCESS"
+ touch $WFDIR/status/${JOBDIR}/${ID}-success
+fi
+
+else
+ # Allow rank 0 to write output
+ sleep 1
+fi # MPI_RANK==0
+
+logstate "END"
+
+closeinfo
+
+if [ "X$PROGRESSIVE_INFO" == "X" ] && [ "X$SCRATCH" != "X" ]; then
+ mkdir -p "$WFDIR/info/$JOBDIR"
+ mv "$INFODIR/${ID}-info" "$WFDIR/info/$JOBDIR/${ID}-info"
+fi
+
+# ensure we exit with a 0 after a successful execution
+exit 0
+
+# Local Variables:
+# mode: sh
+# sh-basic-offset: 4
+# tab-width: 4
+# indent-tabs-mode: 1
+# End:
Property changes on: provenancedb/swift_mod/_swiftwrap_runtime_aggregate
___________________________________________________________________
Added: svn:executable
+ *
Added: provenancedb/swift_mod/_swiftwrap_runtime_snapshots
===================================================================
--- provenancedb/swift_mod/_swiftwrap_runtime_snapshots (rev 0)
+++ provenancedb/swift_mod/_swiftwrap_runtime_snapshots 2012-05-10 12:27:54 UTC (rev 5786)
@@ -0,0 +1,595 @@
+#!/bin/bash
+# this script must be invoked inside of bash, not plain sh
+# note that this script modifies $IFS
+
+# Toggle debugging output from debug()
+DEBUG=0
+
+infosection() {
+ echo >& "$INFO"
+ echo "_____________________________________________________________________________" >& "$INFO"
+ echo >& "$INFO"
+ echo " $1" >& "$INFO"
+ echo "_____________________________________________________________________________" >& "$INFO"
+ echo >& "$INFO"
+}
+
+gather_proc_info()
+{
+ EXEC_PID=$!
+ SAMPLING_INTERVAL=1
+ while true
+ do
+ sleep $SAMPLING_INTERVAL
+
+ MAX_VIRTUAL_MEM=$(grep "VmPeak:" /proc/$EXEC_PID/status | awk '{print $2}')
+ if [ -z "$MAX_VIRTUAL_MEM" ]; then
+ #process finished
+ break
+ fi
+
+ MAX_PHYS_MEM=$(grep "VmHWM:" /proc/$EXEC_PID/status | awk '{print $2}')
+ if [ -z "$MAX_PHYS_MEM" ]; then
+ #process finished
+ break
+ fi
+
+ READ_BYTES=$(grep "read_bytes:" /proc/$EXEC_PID/io | awk '{print $2}')
+ if [ -z "$READ_BYTES" ]; then
+ #process finished
+ break
+ fi
+
+ WRITE_BYTES=$(grep "^write_bytes:" /proc/$EXEC_PID/io | awk '{print $2}')
+ if [ -z "$WRITE_BYTES" ]; then
+ #process finished
+ break
+ fi
+
+ STEP_DATE=$(date +%s)
+ PSLINE=$(ps auxw | grep $EXEC_PID | grep -v grep)
+ if [ -z "$PSLINE" ]; then
+ #process finished
+ break
+ fi
+ CPU_USAGE=$(echo $PSLINE | awk '{print $3}')
+ log "RUNTIME_INFO=timestamp:$STEP_DATE,cpu_usage:$CPU_USAGE,max_phys_mem:$MAX_PHYS_MEM,max_virtual_mem:$MAX_VIRTUAL_MEM,io_read_bytes:$READ_BYTES,io_write_bytes:$WRITE_BYTES"
+ if [ "$SAMPLING_INTERVAL" -lt 60 ]; then
+ let "SAMPLING_INTERVAL=$SAMPLING_INTERVAL+1"
+ fi
+ done
+ wait $EXEC_PID
+}
+
+info() {
+ infosection "command line"
+ echo $COMMANDLINE 2>&1 >& "$INFO"
+ infosection "uname -a"
+ uname -a 2>&1 >& "$INFO"
+ infosection "id"
+ id 2>&1 >& "$INFO"
+ infosection "env"
+ env 2>&1 >& "$INFO"
+ infosection "df"
+ df 2>&1 >& "$INFO"
+ if [ -e "/proc/cpuinfo" ]; then
+ infosection "/proc/cpuinfo"
+ cat /proc/cpuinfo 2>&1 >& "$INFO"
+ fi
+ if [ -e "/proc/meminfo" ]; then
+ infosection "/proc/meminfo"
+ cat /proc/meminfo 2>&1 >& "$INFO"
+ fi
+ if [ -f "$STDOUT" ] ; then
+ infosection "stdout"
+ cat $STDOUT >& "$INFO"
+ fi
+ if [ -f "$STDERR" ] ; then
+ infosection "stderr"
+ cat $STDERR >& "$INFO"
+ fi
+}
+
+logstate() {
+ echo "Progress " `date +"%Y-%m-%d %H:%M:%S.%N%z"` " $@" >& "$INFO"
+}
+
+log() {
+ echo "$@" >& "$INFO"
+}
+
+debug() {
+ [[ $DEBUG == 1 ]] && echo "$@" >& "$INFO"
+}
+
+fail() {
+ EC=$1
+ shift
+
+ if [ "$STATUSMODE" != "files" ]; then
+ mkdir -p $WFDIR/status/$JOBDIR
+ fi
+
+ echo $@ >"$WFDIR/status/$JOBDIR/${ID}-error"
+
+ log $@
+ info
+ if [ "$STATUSMODE" = "files" ]; then
+ exit 0
+ else
+ exit $EC
+ fi
+}
+
+checkError() {
+ if [ "$?" != "0" ]; then
+ fail $@
+ fi
+}
+
+checkEmpty() {
+ if [ "$1" == "" ]; then
+ shift
+ fail 254 $@
+ fi
+}
+
+checkparamfile() {
+ log "checking for paramfile"
+ if [ "$1" == "-p" ]; then
+ JOBDIR=$2
+ PARAMFILE=${WFDIR}/parameters/${JOBDIR}/param-${ID}
+ fi
+ log "paramfile is: $PARAMFILE"
+}
+
+getarg() {
+ NAME=$1
+ shift
+ VALUE=""
+ SHIFTCOUNT=0
+ if [ "$PARAMFILE" == "" ] && [ "$1" == "$NAME" ]; then
+ shift
+ let "SHIFTCOUNT=$SHIFTCOUNT+1"
+ while [ "${1:0:1}" != "-" ] && [ "$#" != "0" ]; do
+ VALUE="$VALUE $1"
+ shift
+ let "SHIFTCOUNT=$SHIFTCOUNT+1"
+ done
+ VALUE="${VALUE:1}"
+ elif [ "$PARAMFILE" != "" ] && grep -E "^$NAME " $PARAMFILE ; then
+ VALUE=$(grep -E "^$NAME " $PARAMFILE | cut -d ' ' -f 2-)
+ else
+ fail 254 "Missing $NAME argument"
+ fi
+}
+
+openinfo() {
+ exec 3<> $1
+ INFO=3
+}
+
+closeinfo() {
+ exec 3>&-
+}
+
+contains() {
+ ARRAY=$1
+ X=$2
+
+ for a in ${!ARRAY}
+ do
+ if [[ ${a} == ${X} ]]; then
+ return 0
+ fi
+ done
+ return 1
+}
+
+genScripts() {
+ echo "#!/bin/bash" > run.sh
+ echo -n "\"$EXEC\" " >> run.sh
+ for CMDARG in "${CMDARGS[@]}"; do
+ echo -n "\"$CMDARG\" " >> run.sh
+ done
+ echo "1>\"$STDOUT\" 2>\"$STDERR\"" >> run.sh
+ chmod +x run.sh
+}
+
+cdm_local_output()
+{
+ L=$1
+
+ if [[ $CDM_FILE == "" ]]; then
+ return
+ fi
+
+ CDM_POLICY=$( cdm_lookup shared/cdm.pl $CDM_FILE $L )
+ if [[ $CDM_POLICY == "LOCAL" ]]; then
+ cdm_local_output_perform $L $CDM_POLICY
+ fi
+}
+
+cdm_local_output_perform()
+{
+ L=$1
+ TOOL=$2
+ REMOTE_DIR=$3
+ FLAGS=$3
+ log "Copying $REMOTE_DIR/$FILE to $JOBDIR/$FILE"
+ mkdir -p $REMOTE_DIR
+ checkError 254 "CDM[LOCAL]: mkdir -p $REMOTE_DIR failed!"
+ $TOOL $FLAGS $JOBDIR/$FILE $REMOTE_DIR/$FILE
+ checkError 254 "CDM[LOCAL]: Tool failed!"
+}
+
+cdm_gather()
+{
+ GATHER_OUTPUT=${*}
+ if [[ $CDM_FILE == "" ]]; then
+ return
+ fi
+ if [[ $GATHER_OUTPUT == "" ]]; then
+ return
+ fi
+
+ cdm_gather_action $GATHER_MAX $GATHER_OUTPUT
+}
+
+COMMANDLINE=$@
+
+# get the parent directory of the directory containing _swiftwrap, to use
+# as the run directory
+# this assumes that _swiftwrap is being executed from the top level of
+# the shared directory, and that shared directory is in the top level
+# of the workflow run directory
+WFDIR=$(dirname $(dirname $0))
+
+cd $WFDIR
+
+# make the WFDIR absolute
+WFDIR=$(pwd)
+PARAMFILE=
+
+openinfo "wrapper.log"
+ID=$1
+checkEmpty "$ID" "Missing job ID"
+
+shift
+
+checkparamfile "$@"
+
+# JOBDIR might have been assigned through the -p option, or might
+# be a parameter here
+if [ "$JOBDIR" == "" ] ; then
+ getarg "-jobdir" "$@"
+ JOBDIR=$VALUE
+ shift $SHIFTCOUNT
+fi
+
+getarg "-scratch" "$@"
+SCRATCH=$VALUE
+shift $SHIFTCOUNT
+
+if [ "X$PROGRESSIVE_INFO" == "X" ] && [ "X$SCRATCH" != "X" ]; then
+ INFODIR=$SCRATCH
+else
+ INFODIR=$WFDIR/info/$JOBDIR
+fi
+checkEmpty "$JOBDIR" "Missing job directory prefix"
+mkdir -p $INFODIR
+closeinfo
+
+if [ -z $MPI_RANK ]; then
+ INFOFILE="$INFODIR/${ID}-info"
+else
+ INFOFILE="$INFODIR/${ID}-${MPI_RANK}-info"
+fi
+rm -f $INFOFILE
+openinfo "$INFOFILE"
+
+logstate "LOG_START"
+infosection "Wrapper (_swiftwrap)"
+
+getarg "-e" "$@"
+EXEC=$VALUE
+shift $SHIFTCOUNT
+
+getarg "-out" "$@"
+STDOUT=$VALUE
+shift $SHIFTCOUNT
+
+getarg "-err" "$@"
+STDERR=$VALUE
+shift $SHIFTCOUNT
+
+getarg "-i" "$@"
+STDIN=$VALUE
+shift $SHIFTCOUNT
+
+getarg "-d" "$@"
+DIRS=$VALUE
+shift $SHIFTCOUNT
+
+getarg "-if" "$@"
+INF=$VALUE
+shift $SHIFTCOUNT
+
+getarg "-of" "$@"
+OUTF=$VALUE
+shift $SHIFTCOUNT
+
+getarg "-k" "$@"
+KICKSTART=$VALUE
+shift $SHIFTCOUNT
+
+getarg "-cdmfile" "$@"
+CDM_FILE=
+if [ "X$VALUE" != "X" ]; then
+ CDM_FILE=shared/$VALUE
+fi
+shift $SHIFTCOUNT
+
+getarg "-status" "$@"
+STATUSMODE=$VALUE
+shift $SHIFTCOUNT
+
+declare -a CMDARGS
+if [ "$PARAMFILE" == "" ] && [ "$1" == "-a" ] ; then
+ shift
+ CMDARGS=("$@")
+elif [ "$PARAMFILE" != "" ] ; then
+ CMDARGS=()
+ FIRST=1
+ while read line ; do
+ if [ "$FIRST" == "1" ] ; then
+ CMDARGS=("$line")
+ FIRST=0
+ else
+ CMDARGS=("${CMDARGS[@]}" "$line")
+ fi
+ done < <(grep -E "^-a " $PARAMFILE | cut -d " " -f 2-)
+else
+ fail 254 "Missing arguments (-a option)"
+fi
+
+if [ "$STATUSMODE" = "files" ]; then
+ mkdir -p $WFDIR/status/$JOBDIR
+fi
+
+if [ "X$CDM_FILE" != "X" ]; then
+ logstate "SOURCE_CDM_LIB $WFDIR/shared/cdm_lib.sh"
+ source $WFDIR/shared/cdm_lib.sh
+ checkError 254 "Could not source: $WFDIR/shared/cdm_lib.sh"
+fi
+
+if [ "X$SCRATCH" != "X" ]; then
+ log "Job directory mode is: local copy"
+ DIR=$SCRATCH/$JOBDIR/$ID
+ COPYNOTLINK=1
+else
+ log "Job directory mode is: link on shared filesystem"
+ DIR=jobs/$JOBDIR/$ID
+ COPYNOTLINK=0
+fi
+
+PATH=$PATH:/bin:/usr/bin
+
+if [ "$PATHPREFIX" != "" ]; then
+ export PATH=$PATHPREFIX:$PATH
+fi
+
+if [ "$SWIFT_EXTRA_INFO" != "" ]; then
+ log "EXTRAINFO=$($SWIFT_EXTRA_INFO)"
+fi
+
+if [ "X${EXEC:0:1}" != "X/" ] ; then
+ export ORIGEXEC=$EXEC
+ export EXEC=$(which $EXEC)
+ if [ "X$EXEC" = "X" ] ; then
+ fail 254 "Cannot find executable $ORIGEXEC on site system path"
+ fi
+fi
+
+debug "PID=$$"
+log "PWD=$PWD"
+log "DIR=$DIR"
+log "EXEC=$EXEC"
+log "STDIN=$STDIN"
+log "STDOUT=$STDOUT"
+log "STDERR=$STDERR"
+log "DIRS=$DIRS"
+log "INF=$INF"
+log "OUTF=$OUTF"
+log "KICKSTART=$KICKSTART"
+log "CDM_FILE=$CDM_FILE"
+[ -z $MPI_RANK ] && log "MPI_RANK=$MPI_RANK"
+log "ARGS=$@"
+log "ARGC=$#"
+IFS="|"
+
+logstate "CREATE_JOBDIR"
+mkdir -p $DIR
+checkError 254 "Failed to create job directory $DIR"
+log "Created job directory: $DIR"
+
+if [[ $MPI_RANK == "" || $MPI_RANK == 0 ]]; then
+
+logstate "CREATE_INPUTDIR"
+for D in $DIRS ; do
+ mkdir -p "$DIR/$D" 2>&1 >>"$INFO"
+ checkError 254 "Failed to create input directory $D"
+ log "Created output directory: $DIR/$D"
+done
+
+logstate "LINK_INPUTS"
+for L in $INF ; do
+ CDM_POLICY="DEFAULT"
+ if [[ $CDM_FILE != "" ]]; then
+ CDM_POLICY=$( cdm_lookup shared/cdm.pl $CDM_FILE $L )
+ fi
+ if [[ $CDM_POLICY != "DEFAULT" && $CDM_POLICY != "EXTERNAL"* ]]; then
+ log "CDM_POLICY: $L -> $CDM_POLICY"
+ eval cdm_action $DIR "INPUT" $L $CDM_POLICY
+ continue
+ fi
+ if [ $COPYNOTLINK = 1 ]; then
+ cp "$WFDIR/shared/$L" "$DIR/$L" 2>&1 >& $INFO
+ checkError 254 "Failed to copy input file $L"
+ log "Copied input: $WFDIR/shared/$L to $DIR/$L"
+ else
+ [ -f $WFDIR/shared/$L ]
+ checkError 254 "Could not locate input file: $L"
+ ln -s "$WFDIR/shared/$L" "$DIR/$L" 2>&1 >& $INFO
+ checkError 254 "Failed to link input file $L"
+ log "Linked input: $WFDIR/shared/$L to $DIR/$L"
+ fi
+done
+
+if [ ! -z $CDM_FILE ]; then
+ logstate "LINK_CDM_OUTPUTS"
+ SKIPPED_OUTPUT=()
+ GATHER_OUTPUT=()
+ for L in $OUTF ; do
+ CDM_POLICY=$( cdm_lookup shared/cdm.pl $CDM_FILE $L )
+ if [[ $CDM_POLICY != "DEFAULT" &&
+ $CDM_POLICY != "BROADCAST"* ]]; then
+ log "CDM_POLICY: $L -> $CDM_POLICY"
+ eval cdm_action $DIR "OUTPUT" $L $CDM_POLICY
+ SKIPPED_OUTPUT=( $SKIPPED_OUTPUT $L )
+ fi
+ if [ $CDM_POLICY == "GATHER" ]; then
+ GATHER_OUTPUT=( $GATHER_OUTPUT $L )
+ elif [ $CDM_POLICY == "LOCAL" ]; then
+ CDM_LOCAL_OUTPUT=( $CDM_LOCAL_OUTPUT $L )
+ fi
+ done
+fi
+
+fi # MPI_RANK==0
+
+debug "Moving to jobdir: $DIR"
+cd $DIR
+if [ $? != 0 ]; then
+ log "PWD: $PWD"
+ log $( find . )
+ fail 254 "Could not cd to: $DIR"
+fi
+logstate "EXECUTE"
+
+debug "Command line: $EXEC ${CMDARGS[@]}"
+
+if [ ! -f "$EXEC" ]; then
+ fail 254 "The executable $EXEC does not exist"
+fi
+if [ ! -x "$EXEC" ]; then
+ fail 254 "The executable $EXEC does not have the executable bit set"
+fi
+if [ "$KICKSTART" == "" ]; then
+ if [ "$STDIN" == "" ]; then
+ if [ "$SWIFT_GEN_SCRIPTS" != "" ]; then
+ genScripts
+ fi
+ "$EXEC" "${CMDARGS[@]}" 1>"$STDOUT" 2>"$STDERR" &
+ gather_proc_info
+ else
+ if [ "$SWIFT_GEN_SCRIPTS" != "" ]; then
+ genScripts
+ fi
+ "$EXEC" "${CMDARGS[@]}" 1>"$STDOUT" 2>"$STDERR" <"$STDIN" &
+ gather_proc_info
+ fi
+ checkError $? "Application $EXEC failed with an exit code of $?"
+else
+ if [ ! -f "$KICKSTART" ]; then
+ log "Kickstart executable ($KICKSTART) not found"
+ fail 254 "The Kickstart executable ($KICKSTART) was not found"
+ elif [ ! -x "$KICKSTART" ]; then
+ log "Kickstart executable ($KICKSTART) is not executable"
+ fail 254 "The Kickstart executable ($KICKSTART) does not have the executable bit set"
+ else
+ mkdir -p $WFDIR/kickstart/$JOBDIR
+ log "Using Kickstart ($KICKSTART)"
+ if [ "$STDIN" == "" ]; then
+ "$KICKSTART" -H -o "$STDOUT" -e "$STDERR" "$EXEC" "$@" 1>kickstart.xml 2>"$STDERR"
+ else
+ "$KICKSTART" -H -o "$STDOUT" -i "$STDIN" -e "$STDERR" "$EXEC" "$@" 1>kickstart.xml 2>"$STDERR"
+ fi
+ export APPEXIT=$?
+ mv -f kickstart.xml "$WFDIR/kickstart/$JOBDIR/$ID-kickstart.xml" 2>&1 >& "$INFO"
+ checkError 254 "Failed to copy Kickstart record to shared directory"
+ if [ "$APPEXIT" != "0" ]; then
+ fail $APPEXIT "Application $EXEC failed with an exit code of $APPEXIT"
+ fi
+ fi
+fi
+
+log "Moving back to workflow directory $WFDIR"
+cd $WFDIR
+if [ $? != 0 ]; then
+ fail 254 "Could not cd to workflow directory: $WFDIR"
+fi
+
+logstate "EXECUTE_DONE"
+log "Job ran successfully"
+
+if [[ $MPI_RANK == "" || $MPI_RANK == 0 ]]; then
+
+MISSING=
+for O in $OUTF ; do
+ if [ ! -f "$DIR/$O" ]; then
+ if [ "$MISSING" == "" ]; then
+ MISSING=$O
+ else
+ MISSING="$MISSING, $O"
+ fi
+ fi
+done
+if [ "$MISSING" != "" ]; then
+ log $( find . )
+ fail 254 "The following output files were not created by the application: $MISSING"
+fi
+
+logstate "MOVING_OUTPUTS $OUTF"
+for O in $OUTF ; do
+ if ! contains SKIPPED_OUTPUT $O ; then
+ mv "$DIR/$O" "$WFDIR/shared/$O" 2>&1 >& "$INFO"
+ checkError 254 "Failed to move output file $O to shared directory"
+ fi
+done
+
+cdm_local_output $CDM_LOCAL_OUTPUT
+cdm_gather $GATHER_OUTPUT
+
+logstate "RM_JOBDIR"
+rm -rf "$DIR" 2>&1 >& "$INFO"
+checkError 254 "Failed to remove job directory $DIR"
+
+if [ "$STATUSMODE" = "files" ]; then
+ logstate "TOUCH_SUCCESS"
+ touch $WFDIR/status/${JOBDIR}/${ID}-success
+fi
+
+else
+ # Allow rank 0 to write output
+ sleep 1
+fi # MPI_RANK==0
+
+logstate "END"
+
+closeinfo
+
+if [ "X$PROGRESSIVE_INFO" == "X" ] && [ "X$SCRATCH" != "X" ]; then
+ mkdir -p "$WFDIR/info/$JOBDIR"
+ mv "$INFODIR/${ID}-info" "$WFDIR/info/$JOBDIR/${ID}-info"
+fi
+
+# ensure we exit with a 0 after a successful execution
+exit 0
+
+# Local Variables:
+# mode: sh
+# sh-basic-offset: 4
+# tab-width: 4
+# indent-tabs-mode: 1
+# End:
Property changes on: provenancedb/swift_mod/_swiftwrap_runtime_snapshots
___________________________________________________________________
Added: svn:executable
+ *
Added: provenancedb/swift_mod/create-everylog-vs-versions-data
===================================================================
--- provenancedb/swift_mod/create-everylog-vs-versions-data (rev 0)
+++ provenancedb/swift_mod/create-everylog-vs-versions-data 2012-05-10 12:27:54 UTC (rev 5786)
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+echo Creating log/version data file: everylog-vs-versions.data
+
+rm -f everylog-vs-versions.data
+
+
+for logfile in `find $1 -name \*.log -and -not -name cluster-\* -and -not -name swift.log $EXTRALOGRESTRICTION`; do
+ SWIFT_VERSION=$(grep -m 1 -E 'Swift .* swift-r[0-9]*' $logfile | sed 's/.*Swift .* swift-r\([0-9]*\).*/\1/')
+ COG_VERSION=$(grep -m 1 -E 'Swift .* swift-r[0-9]*' $logfile | sed 's/.*Swift .* cog-r\([0-9]*\).*/\1/')
+ START=$(head -n1 < $logfile | iso-to-secs | cut -f 1 -d ' ')
+ if [ "X$SWIFT_VERSION" == "X" ]; then
+ SWIFT_VERSION=na
+ COG_VERSION=na
+ fi
+ if [ "X$START" != "X" ]; then
+ echo $START $SWIFT_VERSION $COG_VERSION $logfile >> everylog-vs-versions.data
+ fi
+
+done
+
+echo Finished creating log/version data file
Property changes on: provenancedb/swift_mod/create-everylog-vs-versions-data
___________________________________________________________________
Added: svn:executable
+ *
More information about the Swift-commit
mailing list