[Swift-commit] r7454 - provenancedb
lgadelha at ci.uchicago.edu
lgadelha at ci.uchicago.edu
Fri Dec 27 19:24:17 CST 2013
Author: lgadelha
Date: 2013-12-27 19:24:17 -0600 (Fri, 27 Dec 2013)
New Revision: 7454
Modified:
provenancedb/prov-init.sql
provenancedb/prov-to-sql.sh
provenancedb/swift-prov-import-all-logs
Log:
Adjustment to new format for ids in the log file
Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql 2013-12-26 04:00:11 UTC (rev 7453)
+++ provenancedb/prov-init.sql 2013-12-28 01:24:17 UTC (rev 7454)
@@ -360,11 +360,11 @@
select entity_id, entity_type, key, numeric_value, NULL as text_value from annotation_numeric;
create view script_and_fun_call as
- select script_run_summary.id as script_run_id, log_filename, script_filename, swift_version, cog_version,
+ select script_run_summary.id as script_run_id, script_filename, swift_version, cog_version,
script_run_summary.final_state as script_run_final_state,
script_run_summary.start_time as script_run_start_time,
script_run_summary.duration as script_run_duration,
- script_filename, fun_call.id as function_call_id,
+ fun_call.id as function_call_id,
fun_call.type as function_call_type,
fun_call.name as function_call_name
from script_run_summary,fun_call
Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh 2013-12-26 04:00:11 UTC (rev 7453)
+++ provenancedb/prov-to-sql.sh 2013-12-28 01:24:17 UTC (rev 7454)
@@ -1,6 +1,8 @@
#!/bin/bash
-export RUNID=$(basename $1 .log)
+source_file=$(grep "source file" $1 | awk '{print $5}' | awk -F ":" '{print $1}')
+source_file_prefix=$(echo $source_file | awk -F "." '{print $1}')
+export RUNID="${source_file_prefix}-$(basename $1 .log)"
export WFID="${RUNID}:"
@@ -16,7 +18,7 @@
echo " - Function calls."
while read time duration thread localthread endstate tr_name scratch; do
- id=$(echo "$thread" | sed "s/execute\://")
+ id="${source_file_prefix}-$(echo "$thread" | sed "s/execute\://")"
echo "INSERT INTO fun_call (id, type, run_id) VALUES ('$id', 'execute', '$WF');" >> /tmp/$RUNID.sql
echo "INSERT INTO app_fun_call (id, name, start_time, duration, final_state, scratch) VALUES ('$id', '$tr_name', $time, $duration, '$endstate', '$scratch');" >> /tmp/$RUNID.sql
done < execute.global.event
@@ -75,14 +77,16 @@
echo " - Built-in function calls."
while read id name output; do
+ fid="${source_file_prefix}-${id}"
echo "INSERT INTO ds (id) VALUES ('$output');" >> /tmp/$RUNID.sql
- echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$id', 'function', '$name', '$WF');" >> /tmp/$RUNID.sql
- echo "INSERT INTO dataset_out (function_call_id, dataset_id, parameter) VALUES ('$id', '$output', 'result');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO fun_call (id, type, name, run_id) VALUES ('$fid', 'function', '$name', '$WF');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO dataset_out (function_call_id, dataset_id, parameter) VALUES ('$fid', '$output', 'result');" >> /tmp/$RUNID.sql
done < functions.txt
while read id value; do
+ fid="${source_file_prefix}-${id}"
echo "INSERT INTO ds (id) VALUES ('$value');" >> /tmp/$RUNID.sql
- echo "INSERT INTO dataset_in (function_call_id, dataset_id, parameter) VALUES ('$id', '$value', 'undefined');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO dataset_in (function_call_id, dataset_id, parameter) VALUES ('$fid', '$value', 'undefined');" >> /tmp/$RUNID.sql
done < function-inputs.txt
@@ -116,6 +120,7 @@
echo " - Dataset consumption and production."
while read thread direction dataset variable rest; do
+ fid="${source_file_prefix}-${thread}"
if [ "$direction" == "input" ] ; then
table=dataset_in
else
@@ -123,7 +128,7 @@
fi
echo "INSERT INTO ds (id) VALUES ('$dataset');" >> /tmp/$RUNID.sql
- echo "INSERT INTO $table (function_call_id, dataset_id, parameter) VALUES ('$thread', '$dataset', '$variable');" >> /tmp/$RUNID.sql
+ echo "INSERT INTO $table (function_call_id, dataset_id, parameter) VALUES ('$fid', '$dataset', '$variable');" >> /tmp/$RUNID.sql
done < tie-data-invocs.txt
@@ -153,7 +158,7 @@
signals=$(echo $runtime | awk -F "," '{print $20}' | awk -F ":" '{print $2}')
exit_status=$(echo $runtime | awk -F "," '{print $21}' | awk -F ":" '{print $2}')
- echo "UPDATE app_exec SET real_secs='$real_secs', kernel_secs='$kernel_secs', user_secs='$user_secs', percent_cpu='$percent_cpu', max_rss='$max_rss', avg_rss='$avg_rss', avg_tot_vm='$avg_tot_vm', avg_priv_data='$avg_priv_data', avg_priv_stack='$avg_priv_stack', avg_shared_text='$avg_shared_text', page_size='$page_size', major_pgfaults='$major_pgfaults', minor_pgfaults='$minor_pgfaults', swaps='$swaps', invol_context_switches='$invol_context_switches', vol_waits='$vol_waits', fs_reads='$fs_reads', fs_writes='$fs_writes', sock_recv='$sock_recv', sock_send='$sock_send', signals='$signals', exit_status='$exit_status' WHERE id='$execute2_id';" >> /tmp/$RUNID.sql
+ echo "UPDATE app_exec SET real_secs='$real_secs', kernel_secs='$kernel_secs', user_secs='$user_secs', percent_cpu='$percent_cpu', max_rss='$max_rss', avg_rss='$avg_rss', avg_tot_vm='$avg_tot_vm', avg_priv_data='$avg_priv_data', avg_priv_stack='$avg_priv_stack', avg_shared_text='$avg_shared_text', page_size='$page_size', major_pgfaults='$major_pgfaults', minor_pgfaults='$minor_pgfaults', swaps='$swaps', invol_context_switches='$invol_context_switches', vol_waits='$vol_waits', fs_reads='$fs_reads', fs_writes='$fs_writes', sock_recv='$sock_recv', sock_send='$sock_send', signals='$signals', exit_status='$exit_status' WHERE id='${source_file_prefix}-$execute2_id';" >> /tmp/$RUNID.sql
#echo "INSERT INTO rt_info (app_exec_id, timestamp, cpu_usage, max_phys_mem, max_virt_mem, io_read, io_write) VALUES ('$execute2_id', $timestamp, $cpu_usage, $max_phys_mem, $max_virtual_mem, $io_read_bytes, $io_write_bytes);" >> /tmp/$RUNID.sql
# for key in $(echo maxrss walltime systime usertime cpu fsin fsout timesswapped socketrecv socketsent majorpagefaults minorpagefaults contextswitchesinv contextswitchesvol); do
@@ -165,7 +170,8 @@
echo " - Function call names."
while read thread appname; do
- echo "UPDATE fun_call SET name='$appname' WHERE id='$thread';" >> /tmp/$RUNID.sql
+ fid="${source_file_prefix}-${thread}"
+ echo "UPDATE fun_call SET name='$appname' WHERE id='$fid';" >> /tmp/$RUNID.sql
done < invocation-procedure-names.txt
echo " - Wrapper log extra info."
@@ -174,7 +180,7 @@
echo $extrainfo | awk -F ";" '{ for (i = 1; i <= NF; i++)
print $i
}' | awk -F "=" '{ print $1 " " $2 }' | awk -F ":" '{ print $1 " " $2 }' > fields.txt
- id=$($SQLCMD --tuples-only -c "select app_fun_call_id from app_exec where id='$execute2_id';" | awk '{print $1}')
+ id=$($SQLCMD --tuples-only -c "select app_fun_call_id from app_exec where id='${source_file_prefix}-$execute2_id';" | awk '{print $1}')
while read name type value; do
if [ "$type" = "num" ]; then
echo "INSERT INTO annot_app_exec_num (id, name, value) VALUES ('$id', '$name', $value);" >> /tmp/$RUNID.sql
Modified: provenancedb/swift-prov-import-all-logs
===================================================================
--- provenancedb/swift-prov-import-all-logs 2013-12-26 04:00:11 UTC (rev 7453)
+++ provenancedb/swift-prov-import-all-logs 2013-12-28 01:24:17 UTC (rev 7454)
@@ -35,12 +35,14 @@
export IDIR=$(echo $filename | sed 's/\.log$/.d/')
COG_VERSION=$(grep -m 1 -E 'Swift .* swift-r[0-9]*' $filename | sed 's/.*Swift .* cog-r\([0-9]*\).*/\1/')
+ SCRIPT_FILENAME=$(grep "source file" $filename | awk '{print $5}' | awk -F ":" '{print $1}')
+
echo IDIR=$IDIR
if [ $version -ge 1538 ]; then
echo -n "Log: $filename ... "
# TODO: does not work in sqlite
- fname=$(echo $filename | sed -e 's:[^\`].*/::')
- EXISTING=$($SQLCMD --tuples-only -c "select count(*) from run where log_filename='$fname';")
+ # fname=$(echo $filename | sed -e 's:[^\`].*/::')
+ EXISTING=$($SQLCMD --tuples-only -c "select count(*) from run where log_filename='$filename';")
if [ "$EXISTING" -eq "0" ]; then
PROV_ENABLED=$(grep provenanceid $filename | wc -l)
@@ -54,11 +56,13 @@
fi
export RUNID=$(basename $filename .log)
- source_file=$(echo $fname | sed "s/-[0-9]*-[0-9]*-[0-9a-z]*\.log$/\.swift/")
- export WF="${RUNID}"
+ source_file=$(grep "source file" $filename | awk '{print $5}' | awk -F ":" '{print $1}')
+ source_file_prefix=$(echo $source_file | awk -F "." '{print $1}')
+ export WF="${source_file_prefix}-${RUNID}"
+ echo $WF
- #echo "BEGIN TRANSACTION;" > /tmp/$WF.sql
- echo "INSERT INTO run (id, log_filename, script_filename, swift_version, cog_version, final_state) VALUES ('$WF','$fname','$source_file','$version', '$COG_VERSION', '$wfstatus');" >> /tmp/$WF.sql
+
+ echo "INSERT INTO run (id, log_filename, script_filename, swift_version, cog_version, final_state) VALUES ('$WF','$filename','$source_file','$version', '$COG_VERSION', '$wfstatus');" >> /tmp/$WF.sql
echo version $version in log file $filename
echo ============= will import =============
@@ -83,7 +87,3 @@
fi
done < /tmp/everylog-vs-versions.data
-# now pull the XML data into eXist, in as few runs as possible to avoid
-# JVM startup.
-# ls `pwd`/*.log.xml | time xargs ~/work/eXist/bin/client.sh -m /db/prov -ouri=xmldb:exist:// -p
-
More information about the Swift-commit
mailing list