[Swift-commit] r7530 - provenancedb
lgadelha at ci.uchicago.edu
lgadelha at ci.uchicago.edu
Thu Jan 30 17:29:44 CST 2014
Author: lgadelha
Date: 2014-01-30 17:29:44 -0600 (Thu, 30 Jan 2014)
New Revision: 7530
Modified:
provenancedb/prepare-provenance-chart
provenancedb/prov-init.sql
provenancedb/prov-to-sql.sh
Log:
Gathering staged-in and staged-out files
Added some views for queries about file staging
Modified: provenancedb/prepare-provenance-chart
===================================================================
--- provenancedb/prepare-provenance-chart 2014-01-30 23:20:58 UTC (rev 7529)
+++ provenancedb/prepare-provenance-chart 2014-01-30 23:29:44 UTC (rev 7530)
@@ -14,6 +14,8 @@
cat $1 | grep ' CREATEARRAY MEMBER ' | sed 's/^.* array=\([^ ]*\) index=\([^ ]*\) member=\([^ ]*\).*$/\1 \2 \3/' > createarray-members.txt
cat $1 | grep ' ARRAYRANGE ' | sed 's/^.* thread=\([^ ]*\).*$/\1/' > arrayrange.txt
cat $1 | grep ' SCOPE ' | sed 's/^.* thread=\([^ ]*\).*/\1/' > scopes.txt
+cat $1 | grep ' FILE_STAGE_OUT_START ' | sed "s/^.* srcname=\([^ ]*\).*desthost=\([^ ]*\).*jobid=\([^ ]*\).*\$/\1 \2 \3/" > stage-outs.txt
+cat $1 | grep ' Staging in files ' | sed "s/^.* jobid=\([^ ]*\).*Staging in files \([^$]*\).*\$/\1 \2/" > stage-ins.txt
awk '/BEGIN SWIFTSCRIPT/,/END SWIFTSCRIPT/{if (!/BEGIN SWIFTSCRIPT/&&!/END SWIFTSCRIPT/)print}' $1 > script.txt
awk '/BEGIN SITES/,/END SITES/{if (!/BEGIN SITES/&&!/END SITES/)print}' $1 > sites.txt
awk '/BEGIN TC/,/END TC/{if (!/BEGIN TC/&&!/END TC/)print}' $1 > tc.txt
Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql 2014-01-30 23:20:58 UTC (rev 7529)
+++ provenancedb/prov-init.sql 2014-01-30 23:29:44 UTC (rev 7530)
@@ -30,6 +30,8 @@
drop table primitive cascade;
drop table dataset_containment cascade;
drop table ds cascade;
+drop table stage_in;
+drop table stage_out;
drop view script_and_fun_call;
drop view script_to_app_fun_call;
drop view script_to_app_exec;
@@ -161,6 +163,18 @@
exit_status numeric
);
+create table stage_in (
+ app_exec_id varchar(256) references app_exec (id),
+ filename varchar(1024),
+ primary key (app_exec_id, filename)
+);
+
+create table stage_out (
+ app_exec_id varchar(256) references app_exec (id),
+ filename varchar(1024),
+ primary key (app_exec_id, filename)
+);
+
create view application_execution as
select id, app_fun_call_id as function_call_id, to_timestamp(start_time) as start_time, duration, final_state, site
from app_exec;
@@ -500,4 +514,37 @@
select script_run_id, script_filename, input_dataset_id, input_dataset_type, input_parameter_name, input_dataset_value,
input_dataset_filename, function_call_id, function_call_type, function_call_name, output_dataset_id, output_parameter_name,
output_dataset_type, output_dataset_value, output_dataset_filename
-from provenance_all;
\ No newline at end of file
+from provenance_all;
+
+create view app_exec_stage_in as
+select filename as staged_in_filename, id as app_exec_id, app_fun_call_id, start_time as app_exec_start_time, duration as app_exec_duration,
+ final_state as app_exec_final_state , site as app_exec_site, real_secs as app_exec_real_secs, kernel_secs as app_exec_kernel_secs,
+ user_secs as app_exec_user_secs, percent_cpu as app_exec_percent_cpu, max_rss as app_exec_max_rss, avg_rss as app_exec_avg_rss,
+ avg_tot_vm as app_exec_avg_tot_vm, avg_priv_data as app_exec_avg_priv_data, avg_priv_stack as app_exec_avg_priv_stack,
+ avg_shared_text as app_exec_avg_shared_text, page_size as app_exec_page_size, major_pgfaults as app_exec_major_pgfaults,
+ minor_pgfaults as app_exec_minor_pgfaults, swaps as app_exec_swaps, invol_context_switches as app_exec_invol_context_switches,
+ vol_waits as app_exec_vol_waits, fs_reads as app_exec_fs_reads, fs_writes as app_exec_fs_writes, sock_recv as app_exec_sock_recv,
+ sock_send as app_exec_sock_send, signals as app_exec_signals, exit_status as app_exec_exit_status
+from stage_in,app_exec where stage_in.app_exec_id=app_exec.id;
+
+create view app_exec_stage_out as
+select filename as staged_in_filename, id as app_exec_id, app_fun_call_id, start_time as app_exec_start_time, duration as app_exec_duration,
+ final_state as app_exec_final_state , site as app_exec_site, real_secs as app_exec_real_secs, kernel_secs as app_exec_kernel_secs,
+ user_secs as app_exec_user_secs, percent_cpu as app_exec_percent_cpu, max_rss as app_exec_max_rss, avg_rss as app_exec_avg_rss,
+ avg_tot_vm as app_exec_avg_tot_vm, avg_priv_data as app_exec_avg_priv_data, avg_priv_stack as app_exec_avg_priv_stack,
+ avg_shared_text as app_exec_avg_shared_text, page_size as app_exec_page_size, major_pgfaults as app_exec_major_pgfaults,
+ minor_pgfaults as app_exec_minor_pgfaults, swaps as app_exec_swaps, invol_context_switches as app_exec_invol_context_switches,
+ vol_waits as app_exec_vol_waits, fs_reads as app_exec_fs_reads, fs_writes as app_exec_fs_writes, sock_recv as app_exec_sock_recv,
+ sock_send as app_exec_sock_send, signals as app_exec_signals, exit_status as app_exec_exit_status
+from stage_out,app_exec where stage_in.app_exec_id=app_exec.id;
+
+create view app_exec_staging as
+select stage_in.filename as staged_in_filename, id as app_exec_id, app_fun_call_id, start_time as app_exec_start_time, duration as app_exec_duration,
+ final_state as app_exec_final_state , site as app_exec_site, real_secs as app_exec_real_secs, kernel_secs as app_exec_kernel_secs,
+ user_secs as app_exec_user_secs, percent_cpu as app_exec_percent_cpu, max_rss as app_exec_max_rss, avg_rss as app_exec_avg_rss,
+ avg_tot_vm as app_exec_avg_tot_vm, avg_priv_data as app_exec_avg_priv_data, avg_priv_stack as app_exec_avg_priv_stack,
+ avg_shared_text as app_exec_avg_shared_text, page_size as app_exec_page_size, major_pgfaults as app_exec_major_pgfaults,
+ minor_pgfaults as app_exec_minor_pgfaults, swaps as app_exec_swaps, invol_context_switches as app_exec_invol_context_switches,
+ vol_waits as app_exec_vol_waits, fs_reads as app_exec_fs_reads, fs_writes as app_exec_fs_writes, sock_recv as app_exec_sock_recv,
+ sock_send as app_exec_sock_send, signals as app_exec_signals, exit_status as app_exec_exit_status stage_out.filename as staged_out_filename
+from stage_out,app_exec,stage_in where stage_in.app_exec_id=app_exec.id and app_exec.id=stage_out.app_exec_id;
Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh 2014-01-30 23:20:58 UTC (rev 7529)
+++ provenancedb/prov-to-sql.sh 2014-01-30 23:29:44 UTC (rev 7530)
@@ -131,7 +131,14 @@
echo "INSERT INTO $table (function_call_id, dataset_id, parameter) VALUES ('$thread', '$dataset', '$variable');" >> /tmp/$RUNID.sql
done < tie-data-invocs.txt
+#echo " - Dataset stage-ins and stage-outs."
+#while read filename execute; do
+# thread=$(grep $execute execute2.event | cut -f 5 -d " ")
+# dataset_id=$(grep "$thread " tie-data-invocs.txt | grep output | cut -f 3 -d " ")
+# echo "INSERT INTO mapped values ('$dataset_id', '$filename');" >> /tmp/$RUNID.sql
+#done < stage-outs.txt
+
echo " - Wrapper log resource consumption info."
if [ -f runtime.txt ]; then
while read execute2_id runtime; do
@@ -192,6 +199,21 @@
done < extrainfo.txt
fi
+echo " - Stage ins and stage outs."
+while read execute2_id stage_in; do
+ app_exec_id=$WFID$execute2_id
+ for isid in $(echo $stage_in); do
+ filename=$(echo $isid | sed "s/^\[//" | sed "s/\,$//" | sed "s/\]$//" | sed "s/\/\.\//\//g");
+ echo "INSERT INTO stage_in VALUES ('$app_exec_id', '$filename');" >> /tmp/$RUNID.sql
+ done;
+done < stage-ins.txt
+
+while read stage_out desthost execute2_id; do
+ app_exec_id=$WFID$execute2_id
+ filename="filename://"$desthost"/"$(echo $stage_out | sed "s/\/\.\//\//g");
+ echo "INSERT INTO stage_out VALUES ('$app_exec_id', '$filename');" >> /tmp/$RUNID.sql
+done < stage-outs.txt
+
echo " - Prospective provenance (script, tc, sites)."
script_hash=$(openssl dgst -sha1 script.txt | awk '{ print $2 }')
EXISTING=$($SQLCMD --tuples-only -c "select count(*) from script where hash_value='$script_hash';")
@@ -221,7 +243,6 @@
echo "UPDATE run SET sites_file_hash='$sites_hash' WHERE id='$WF';" >> /tmp/$RUNID.sql
-
echo "Finished SQL generation."
echo "Exporting provenance to database..."
$SQLCMD -f /tmp/$RUNID.sql 1> /dev/null 2> /tmp/$RUNID-provenancedb-error.log
More information about the Swift-commit
mailing list