[Swift-commit] r7530 - provenancedb

lgadelha at ci.uchicago.edu lgadelha at ci.uchicago.edu
Thu Jan 30 17:29:44 CST 2014


Author: lgadelha
Date: 2014-01-30 17:29:44 -0600 (Thu, 30 Jan 2014)
New Revision: 7530

Modified:
   provenancedb/prepare-provenance-chart
   provenancedb/prov-init.sql
   provenancedb/prov-to-sql.sh
Log:
Gathering staged-in and staged-out files
Added some views for queries about file staging


Modified: provenancedb/prepare-provenance-chart
===================================================================
--- provenancedb/prepare-provenance-chart	2014-01-30 23:20:58 UTC (rev 7529)
+++ provenancedb/prepare-provenance-chart	2014-01-30 23:29:44 UTC (rev 7530)
@@ -14,6 +14,8 @@
 cat $1 | grep ' CREATEARRAY MEMBER ' | sed 's/^.* array=\([^ ]*\) index=\([^ ]*\) member=\([^ ]*\).*$/\1 \2 \3/' > createarray-members.txt
 cat $1 | grep ' ARRAYRANGE ' | sed 's/^.* thread=\([^ ]*\).*$/\1/' > arrayrange.txt
 cat $1 | grep ' SCOPE ' | sed 's/^.* thread=\([^ ]*\).*/\1/' > scopes.txt
+cat $1 | grep ' FILE_STAGE_OUT_START ' | sed "s/^.* srcname=\([^ ]*\).*desthost=\([^ ]*\).*jobid=\([^ ]*\).*\$/\1 \2 \3/" > stage-outs.txt
+cat $1 | grep ' Staging in files ' | sed "s/^.* jobid=\([^ ]*\).*Staging in files \([^$]*\).*\$/\1 \2/" > stage-ins.txt
 awk '/BEGIN SWIFTSCRIPT/,/END SWIFTSCRIPT/{if (!/BEGIN SWIFTSCRIPT/&&!/END SWIFTSCRIPT/)print}' $1 > script.txt
 awk '/BEGIN SITES/,/END SITES/{if (!/BEGIN SITES/&&!/END SITES/)print}' $1 > sites.txt
 awk '/BEGIN TC/,/END TC/{if (!/BEGIN TC/&&!/END TC/)print}' $1 > tc.txt

Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql	2014-01-30 23:20:58 UTC (rev 7529)
+++ provenancedb/prov-init.sql	2014-01-30 23:29:44 UTC (rev 7530)
@@ -30,6 +30,8 @@
 drop table primitive cascade;
 drop table dataset_containment cascade;
 drop table ds cascade;
+drop table stage_in;
+drop table stage_out;
 drop view script_and_fun_call;
 drop view script_to_app_fun_call;
 drop view script_to_app_exec;
@@ -161,6 +163,18 @@
      exit_status		numeric
 );
 
+create table stage_in (
+     app_exec_id	varchar(256) references app_exec (id),
+     filename		varchar(1024),
+     primary key	(app_exec_id, filename)
+);
+
+create table stage_out (
+     app_exec_id	varchar(256) references app_exec (id),
+     filename		varchar(1024),
+     primary key	(app_exec_id, filename)
+);
+
 create view application_execution as
   select id, app_fun_call_id as function_call_id, to_timestamp(start_time) as start_time, duration, final_state, site
   from   app_exec;
@@ -500,4 +514,37 @@
 select script_run_id, script_filename, input_dataset_id, input_dataset_type, input_parameter_name, input_dataset_value, 
        input_dataset_filename, function_call_id, function_call_type, function_call_name, output_dataset_id, output_parameter_name, 
        output_dataset_type, output_dataset_value, output_dataset_filename 
-from   provenance_all;
\ No newline at end of file
+from   provenance_all;
+
+create view app_exec_stage_in as
+select filename as staged_in_filename, id as app_exec_id, app_fun_call_id, start_time as app_exec_start_time, duration as app_exec_duration, 
+       final_state as app_exec_final_state , site as app_exec_site, real_secs as app_exec_real_secs, kernel_secs as app_exec_kernel_secs,
+       user_secs as app_exec_user_secs, percent_cpu as app_exec_percent_cpu, max_rss as app_exec_max_rss, avg_rss as app_exec_avg_rss,
+       avg_tot_vm as app_exec_avg_tot_vm, avg_priv_data as app_exec_avg_priv_data, avg_priv_stack as app_exec_avg_priv_stack, 
+       avg_shared_text as app_exec_avg_shared_text, page_size as app_exec_page_size, major_pgfaults as app_exec_major_pgfaults,	   
+       minor_pgfaults as app_exec_minor_pgfaults, swaps as app_exec_swaps, invol_context_switches as app_exec_invol_context_switches,
+       vol_waits as app_exec_vol_waits, fs_reads as app_exec_fs_reads, fs_writes as app_exec_fs_writes, sock_recv as app_exec_sock_recv,
+       sock_send as app_exec_sock_send, signals as app_exec_signals, exit_status as app_exec_exit_status
+from   stage_in,app_exec where stage_in.app_exec_id=app_exec.id;
+
+create view app_exec_stage_out as
+select filename as staged_in_filename, id as app_exec_id, app_fun_call_id, start_time as app_exec_start_time, duration as app_exec_duration, 
+       final_state as app_exec_final_state , site as app_exec_site, real_secs as app_exec_real_secs, kernel_secs as app_exec_kernel_secs,
+       user_secs as app_exec_user_secs, percent_cpu as app_exec_percent_cpu, max_rss as app_exec_max_rss, avg_rss as app_exec_avg_rss,
+       avg_tot_vm as app_exec_avg_tot_vm, avg_priv_data as app_exec_avg_priv_data, avg_priv_stack as app_exec_avg_priv_stack, 
+       avg_shared_text as app_exec_avg_shared_text, page_size as app_exec_page_size, major_pgfaults as app_exec_major_pgfaults,	   
+       minor_pgfaults as app_exec_minor_pgfaults, swaps as app_exec_swaps, invol_context_switches as app_exec_invol_context_switches,
+       vol_waits as app_exec_vol_waits, fs_reads as app_exec_fs_reads, fs_writes as app_exec_fs_writes, sock_recv as app_exec_sock_recv,
+       sock_send as app_exec_sock_send, signals as app_exec_signals, exit_status as app_exec_exit_status
+from   stage_out,app_exec where stage_in.app_exec_id=app_exec.id;
+
+create view app_exec_staging as
+select stage_in.filename as staged_in_filename, id as app_exec_id, app_fun_call_id, start_time as app_exec_start_time, duration as app_exec_duration,
+       final_state as app_exec_final_state , site as app_exec_site, real_secs as app_exec_real_secs, kernel_secs as app_exec_kernel_secs,
+       user_secs as app_exec_user_secs, percent_cpu as app_exec_percent_cpu, max_rss as app_exec_max_rss, avg_rss as app_exec_avg_rss,
+       avg_tot_vm as app_exec_avg_tot_vm, avg_priv_data as app_exec_avg_priv_data, avg_priv_stack as app_exec_avg_priv_stack, 
+       avg_shared_text as app_exec_avg_shared_text, page_size as app_exec_page_size, major_pgfaults as app_exec_major_pgfaults,	   
+       minor_pgfaults as app_exec_minor_pgfaults, swaps as app_exec_swaps, invol_context_switches as app_exec_invol_context_switches,
+       vol_waits as app_exec_vol_waits, fs_reads as app_exec_fs_reads, fs_writes as app_exec_fs_writes, sock_recv as app_exec_sock_recv,
+       sock_send as app_exec_sock_send, signals as app_exec_signals, exit_status as app_exec_exit_status stage_out.filename as staged_out_filename
+from   stage_out,app_exec,stage_in where stage_in.app_exec_id=app_exec.id and app_exec.id=stage_out.app_exec_id;

Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh	2014-01-30 23:20:58 UTC (rev 7529)
+++ provenancedb/prov-to-sql.sh	2014-01-30 23:29:44 UTC (rev 7530)
@@ -131,7 +131,14 @@
     echo "INSERT INTO $table (function_call_id, dataset_id, parameter) VALUES ('$thread', '$dataset', '$variable');"  >> /tmp/$RUNID.sql
 done < tie-data-invocs.txt
 
+#echo "    - Dataset stage-ins and stage-outs."
+#while read filename execute; do
+#    thread=$(grep $execute execute2.event | cut -f 5 -d " ")
+#    dataset_id=$(grep "$thread " tie-data-invocs.txt | grep output | cut -f 3 -d " ")
+#    echo "INSERT INTO mapped values ('$dataset_id', '$filename');"  >> /tmp/$RUNID.sql
+#done < stage-outs.txt
 
+
 echo "    - Wrapper log resource consumption info."
 if [ -f runtime.txt ]; then
     while read execute2_id runtime; do
@@ -192,6 +199,21 @@
     done < extrainfo.txt
 fi
 
+echo "    - Stage ins and stage outs."
+while read execute2_id stage_in; do
+    app_exec_id=$WFID$execute2_id
+    for isid in $(echo $stage_in); do 
+	filename=$(echo $isid | sed "s/^\[//" | sed "s/\,$//" | sed "s/\]$//" | sed "s/\/\.\//\//g"); 
+	echo "INSERT INTO stage_in VALUES ('$app_exec_id', '$filename');"  >> /tmp/$RUNID.sql
+    done; 
+done < stage-ins.txt 
+
+while read stage_out desthost execute2_id; do
+    app_exec_id=$WFID$execute2_id
+    filename="filename://"$desthost"/"$(echo $stage_out | sed "s/\/\.\//\//g"); 
+    echo "INSERT INTO stage_out VALUES ('$app_exec_id', '$filename');"  >> /tmp/$RUNID.sql
+done < stage-outs.txt 
+
 echo "    - Prospective provenance (script, tc, sites)."
 script_hash=$(openssl dgst -sha1  script.txt | awk  '{ print $2 }')
 EXISTING=$($SQLCMD --tuples-only -c "select count(*) from script where hash_value='$script_hash';")
@@ -221,7 +243,6 @@
 echo "UPDATE run SET sites_file_hash='$sites_hash' WHERE id='$WF';" >> /tmp/$RUNID.sql
 
 
-
 echo "Finished SQL generation."
 echo "Exporting provenance to database..."
 $SQLCMD -f /tmp/$RUNID.sql 1> /dev/null 2> /tmp/$RUNID-provenancedb-error.log




More information about the Swift-commit mailing list