[Swift-commit] r5715 - in provenancedb: . apps apps/SciColSim
lgadelha at ci.uchicago.edu
lgadelha at ci.uchicago.edu
Wed Mar 14 14:44:33 CDT 2012
Author: lgadelha
Date: 2012-03-14 14:44:33 -0500 (Wed, 14 Mar 2012)
New Revision: 5715
Added:
provenancedb/apps/SciColSim/
provenancedb/apps/SciColSim/SciColSim_extractor.sh
Modified:
provenancedb/prov-to-sql.sh
provenancedb/swift-prov-import-all-logs
Log:
SciColSim annotation extractor, minor updates.
Added: provenancedb/apps/SciColSim/SciColSim_extractor.sh
===================================================================
--- provenancedb/apps/SciColSim/SciColSim_extractor.sh (rev 0)
+++ provenancedb/apps/SciColSim/SciColSim_extractor.sh 2012-03-14 19:44:33 UTC (rev 5715)
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+# Annotation extractor for the SciColSim application
+#
+
+PROVDB_HOME=~/provenancedb
+SCICOLSIM_HOME=/home/lgadelha/SciColSim
+IMPORT_HOME=/tmp
+
+#cp $PROVDB_HOME/etc/provenance.config.ci $PROVDB_HOME/etc/provenance.config
+source $PROVDB_HOME/etc/provenance.config
+
+cd $SCICOLSIM_HOME
+for k in $(find . -name "annealing-*-*-*.log");
+do
+ run_dir=$(echo "$k" | awk -F "annealing-" '{print $1}')
+ cd $SCICOLSIM_HOME/$run_dir
+ full_dir=$(pwd)
+ log_suffix=$(echo "$k" | awk -F "^." '{print $2}')
+ count=$($SQLCMD --tuples-only -c "select count(*) from script_run where log_filename like '%$log_suffix';" | awk '{print $1}')
+ if [ "$count" -eq "0" ]; then
+ echo "export LOGREPO=$full_dir" > $PROVDB_HOME/etc/provenance.config
+ echo "export SQLCMD=\"$SQLCMD\"" >> $PROVDB_HOME/etc/provenance.config
+ cd /tmp
+ $PROVDB_HOME/swift-prov-import-all-logs
+ SCICOLSIM_RUNID=$(echo "$run_dir" | awk -F "/" '{print $2}')
+ SCRIPT_RUN_ID=$($SQLCMD --tuples-only -c "SELECT id FROM script_run WHERE log_filename like '%$log_suffix';" | awk '{print $1}')
+ echo "Annotating script run $SCRIPT_RUN_ID with key-value(text) pair ('scicolsim_run_id', '$SCICOLSIM_RUNID')"
+ $SQLCMD -c "INSERT INTO a_run_t VALUES ('$SCRIPT_RUN_ID', 'scicolsim_run_id', '$SCICOLSIM_RUNID')" 1> /dev/null
+ cd $SCICOLSIM_HOME/$run_dir
+ while read key value rest
+ do
+ if [ -n "$key" ]; then
+ echo "Annotating script run $SCRIPT_RUN_ID with key-value(numeric) pair ('$key', $value)."
+ $SQLCMD -c "INSERT INTO a_run_n VALUES ('$SCRIPT_RUN_ID', '$key', $value)" 1> /dev/null
+ fi
+ done < paramfile
+
+ cd /tmp
+ while read t d id rest; do
+ cd $SCICOLSIM_HOME
+ record=$(find "$run_dir" -name ${id}-info | grep -v swiftwork)
+ globalid=$EXECUTE2PREFIX$id
+ if [ -n "$record" -a -f "$record" ] ; then
+ outf=$(grep '^OUTF=' $record | awk -F "=" '{print $2}' | awk -F "|" '{print $1}')
+ while read keyc value rest
+ do
+ key=$(echo $keyc | awk -F ":" '{print $1'})
+ if [ "$key" != "$keyc" -a "$key" != "multi_loss" ]; then
+ PROC_ID=$($SQLCMD --tuples-only -c "SELECT app_inv_id FROM app_exec WHERE id='execute2:$SCRIPT_RUN_ID:$globalid';" | awk '{print $1}')
+ if [ "$key" == "Operation" ]; then
+ echo "Annotating function call $PROC_ID with key-value(text) pair ('$key', '$value')."
+ $SQLCMD -c "INSERT INTO a_proc_t VALUES ('$PROC_ID', '$key', '$value')" 1> /dev/null
+ else
+ echo "Annotating function call $PROC_ID with key-value(numeric) pair ('$key', $value)."
+ $SQLCMD -c "INSERT INTO a_proc_n VALUES ('$PROC_ID', '$key', $value)" 1> /dev/null
+ fi
+ fi
+ done < $SCICOLSIM_HOME/$run_dir/$outf
+ else
+ echo no wrapper log for $id >&2
+ fi
+ done < execute2.event
+
+ fi
+ cd $SCICOLSIM_HOME
+done
Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh 2012-03-14 16:49:49 UTC (rev 5714)
+++ provenancedb/prov-to-sql.sh 2012-03-14 19:44:33 UTC (rev 5715)
@@ -7,18 +7,20 @@
# TODO is there already a URI form for identifying workflows?
export WF="${RUNID}"
-echo Generating SQL for $RUNID
+echo "Generating SQL for $RUNID:"
# this gives a distinction between the root process for a workflow and the
# workflow itself. perhaps better to model the workflow as a process
-#echo "BEGIN TRANSACTION;" > /tmp/$RUNID.sql
+echo " - Root thread."
echo "INSERT INTO proc (id, type, name, run_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" >> /tmp/$RUNID.sql
+echo " - Function calls."
while read time duration thread localthread endstate tr_name scratch; do
echo "INSERT INTO proc (id, type, run_id) VALUES ('$thread', 'execute', '$WF');" >> /tmp/$RUNID-1.sql
echo "INSERT INTO app_inv (id, proc_name, start_time, duration, final_state, scratch) VALUES ('$thread', '$tr_name', $time, $duration, '$endstate', '$scratch');" >> /tmp/$RUNID-2.sql
done < execute.global.event
+echo " - Application executions."
while read start_time duration globalid id endstate thread site scratch; do
# cut off the last component of the thread, so that we end up at the
# parent thread id which should correspond with the execute-level ID
@@ -26,17 +28,19 @@
echo "INSERT INTO app_exec (id, app_inv_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');" >> /tmp/$RUNID-3.sql
done < execute2.global.event
-
+echo " - Mapped variables."
while read dataset filename; do
echo "INSERT INTO ds (id) VALUES ('$dataset');" >> /tmp/$RUNID-4.sql
echo "INSERT INTO file (id, name) VALUES ('$dataset', '$filename');" >> /tmp/$RUNID-5.sql
done < dataset-filenames.txt
+echo " - Primitive variables."
while read dataset idtype equal value rest; do
echo "INSERT INTO ds (id) VALUES ('$dataset');" >> /tmp/$RUNID-4.sql
echo "INSERT INTO in_mem (id, value) VALUES ('$dataset', '$value');" >> /tmp/$RUNID-5.sql
done < dataset-values.txt
+echo " - Arrays and structures."
while read outer inner; do
echo "INSERT INTO ds (id) VALUES ('$outer');" >> /tmp/$RUNID-4.sql
echo "INSERT INTO ds (id) VALUES ('$inner');" >> /tmp/$RUNID-4.sql
@@ -46,7 +50,7 @@
echo "INSERT INTO ds_out (proc_id, ds_id, param) VALUES ('${WFID}constructor:$outer', '$outer', 'collection');" >> /tmp/$RUNID-5.sql
done < tie-containers.txt
-
+echo " - Operators."
while read col1 col2 col3 col4 col5 thread name lhs rhs result; do
thread=$(echo $thread | awk 'BEGIN { FS = "=" }; {print $2}')
name=$(echo $name | awk 'BEGIN { FS = "=" }; {print $2}')
@@ -65,6 +69,7 @@
echo "INSERT INTO ds_out (proc_id, ds_id, param) VALUES ('$operatorid', '$result', 'result');" >> /tmp/$RUNID-5.sql
done < operators.txt
+echo " - Built-in functions."
while read id name output; do
echo "INSERT INTO ds (id) VALUES ('$output');" >> /tmp/$RUNID-4.sql
echo "INSERT INTO proc (id, type, name, run_id) VALUES ('$id', 'function', '$name', '$WF');" >> /tmp/$RUNID-1.sql
@@ -76,11 +81,12 @@
echo "INSERT INTO ds_in (proc_id, ds_id, param) VALUES ('$id', '$value', 'undefined');" >> /tmp/$RUNID-5.sql
done < function-inputs.txt
-
+echo " - Function call names."
while read thread appname; do
echo "UPDATE proc SET name='$appname' WHERE id='$thread';" >> /tmp/$RUNID-3.sql
done < invocation-procedure-names.txt
+echo " - Script run events."
while read start duration wfid rest; do
echo "UPDATE run SET start_time=$start WHERE id='$WF';" >> /tmp/$RUNID-1.sql
echo "UPDATE run SET duration=$duration WHERE id='$WF';" >> /tmp/$RUNID-1.sql
@@ -88,6 +94,7 @@
# TODO this could merge with other naming tables
+echo " - Compound functions."
while read start duration thread final_state procname ; do
if [ "$duration" != "last-event-line" ]; then
compoundid=$WFID$thread
@@ -107,6 +114,7 @@
echo "INSERT INTO proc (id, type, name, run_id) VALUES ('$thread', 'scope', 'scope', '$WF');" >> /tmp/$RUNID-1.sql
done < scopes.txt
+echo " - Variable consumption and production."
while read thread direction dataset variable rest; do
if [ "$direction" == "input" ] ; then
table=ds_in
@@ -118,6 +126,7 @@
echo "INSERT INTO $table (proc_id, ds_id, param) VALUES ('$thread', '$dataset', '$variable');" >> /tmp/$RUNID-5.sql
done < tie-data-invocs.txt
+echo " - Wrapper log extra info."
if [ -f extrainfo.txt ]; then
while read execute2_id extrainfo; do
echo $extrainfo | awk -F ";" '{ for (i = 1; i <= NF; i++)
@@ -135,6 +144,7 @@
done < extrainfo.txt
fi
+echo " - Wrapper log resource consumption info."
if [ -f runtime.txt ]; then
while read execute2_id runtime; do
#timestamp=$(echo $runtime | awk -F "," '{print $1}' | awk -F ":" '{print $2}')
@@ -168,7 +178,10 @@
rm /tmp/$RUNID-$i.sql
done
-echo "COMMIT;" >> /tmp/$RUNID.sql
-$SQLCMD -f /tmp/$RUNID.sql
+#echo "COMMIT;" >> /tmp/$RUNID.sql
+
+echo "Finished SQL generation."
+echo "Exporting provenance to database..."
+$SQLCMD -f /tmp/$RUNID.sql 1> /tmp/$RUNID-provenancedb-import.log
rm /tmp/$RUNID.sql
-echo Finished sending SQL to DB
\ No newline at end of file
+echo "Finished exporting provenance to database."
\ No newline at end of file
Modified: provenancedb/swift-prov-import-all-logs
===================================================================
--- provenancedb/swift-prov-import-all-logs 2012-03-14 16:49:49 UTC (rev 5714)
+++ provenancedb/swift-prov-import-all-logs 2012-03-14 19:44:33 UTC (rev 5715)
@@ -57,7 +57,7 @@
export WF="${RUNID}"
- echo "BEGIN TRANSACTION;" > /tmp/$WF.sql
+ #echo "BEGIN TRANSACTION;" > /tmp/$WF.sql
echo "INSERT INTO run (id, log_filename, swift_version, cog_version, final_state) VALUES ('$WF','$filename','$version', '$COG_VERSION', '$wfstatus');" >> /tmp/$WF.sql
echo version $version in log file $filename
More information about the Swift-commit
mailing list