[Swift-commit] r5715 - in provenancedb: . apps apps/SciColSim

lgadelha at ci.uchicago.edu lgadelha at ci.uchicago.edu
Wed Mar 14 14:44:33 CDT 2012


Author: lgadelha
Date: 2012-03-14 14:44:33 -0500 (Wed, 14 Mar 2012)
New Revision: 5715

Added:
   provenancedb/apps/SciColSim/
   provenancedb/apps/SciColSim/SciColSim_extractor.sh
Modified:
   provenancedb/prov-to-sql.sh
   provenancedb/swift-prov-import-all-logs
Log:
SciColSim annotation extractor, minor updates.


Added: provenancedb/apps/SciColSim/SciColSim_extractor.sh
===================================================================
--- provenancedb/apps/SciColSim/SciColSim_extractor.sh	                        (rev 0)
+++ provenancedb/apps/SciColSim/SciColSim_extractor.sh	2012-03-14 19:44:33 UTC (rev 5715)
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+# Annotation extractor for the SciColSim application
+# 
+
+PROVDB_HOME=~/provenancedb
+SCICOLSIM_HOME=/home/lgadelha/SciColSim
+IMPORT_HOME=/tmp
+
+#cp $PROVDB_HOME/etc/provenance.config.ci $PROVDB_HOME/etc/provenance.config
+source $PROVDB_HOME/etc/provenance.config
+
+cd $SCICOLSIM_HOME
+for k in $(find . -name "annealing-*-*-*.log");
+do
+    run_dir=$(echo "$k" | awk -F "annealing-" '{print $1}')
+    cd $SCICOLSIM_HOME/$run_dir
+    full_dir=$(pwd)
+    log_suffix=$(echo "$k" | awk -F "^." '{print $2}')
+    count=$($SQLCMD --tuples-only -c "select count(*) from script_run where log_filename like '%$log_suffix';" | awk '{print $1}')
+    if [ "$count" -eq "0" ]; then
+	echo "export LOGREPO=$full_dir" > $PROVDB_HOME/etc/provenance.config
+	echo "export SQLCMD=\"$SQLCMD\"" >> $PROVDB_HOME/etc/provenance.config
+	cd /tmp
+	$PROVDB_HOME/swift-prov-import-all-logs
+	SCICOLSIM_RUNID=$(echo "$run_dir" | awk -F "/" '{print $2}')
+	SCRIPT_RUN_ID=$($SQLCMD --tuples-only -c "SELECT id FROM script_run WHERE log_filename like '%$log_suffix';" | awk '{print $1}')
+	echo "Annotating script run $SCRIPT_RUN_ID with key-value(text) pair ('scicolsim_run_id', '$SCICOLSIM_RUNID')"
+	$SQLCMD -c "INSERT INTO a_run_t VALUES ('$SCRIPT_RUN_ID', 'scicolsim_run_id', '$SCICOLSIM_RUNID')" 1> /dev/null
+	cd $SCICOLSIM_HOME/$run_dir
+	while read key value rest
+	do
+	    if [ -n "$key" ]; then
+		echo "Annotating script run $SCRIPT_RUN_ID with key-value(numeric) pair ('$key', $value)."
+		$SQLCMD -c "INSERT INTO a_run_n VALUES ('$SCRIPT_RUN_ID', '$key', $value)" 1> /dev/null
+	    fi
+	done < paramfile
+	
+	cd /tmp
+	while read t d id rest; do
+	    cd $SCICOLSIM_HOME
+	    record=$(find "$run_dir" -name ${id}-info | grep -v swiftwork)
+	    globalid=$EXECUTE2PREFIX$id
+	    if [ -n "$record" -a -f "$record" ] ; then
+		outf=$(grep '^OUTF=' $record | awk -F "=" '{print $2}' | awk -F "|" '{print $1}')
+		while read keyc value rest
+		do
+		    key=$(echo $keyc | awk -F ":" '{print $1'})
+		    if [ "$key" != "$keyc" -a "$key" != "multi_loss" ]; then
+			PROC_ID=$($SQLCMD --tuples-only -c "SELECT app_inv_id FROM app_exec WHERE id='execute2:$SCRIPT_RUN_ID:$globalid';" | awk '{print $1}')
+			if [ "$key" == "Operation" ]; then
+			    echo "Annotating function call $PROC_ID with key-value(text) pair ('$key', '$value')."
+			    $SQLCMD -c "INSERT INTO a_proc_t VALUES ('$PROC_ID', '$key', '$value')" 1> /dev/null
+			else
+			    echo "Annotating function call $PROC_ID with key-value(numeric) pair ('$key', $value)."
+			    $SQLCMD -c "INSERT INTO a_proc_n VALUES ('$PROC_ID', '$key', $value)" 1> /dev/null
+			fi
+		    fi	
+		done < $SCICOLSIM_HOME/$run_dir/$outf
+	    else
+		echo no wrapper log for $id >&2
+	    fi
+	done < execute2.event
+	
+    fi
+    cd $SCICOLSIM_HOME
+done

Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh	2012-03-14 16:49:49 UTC (rev 5714)
+++ provenancedb/prov-to-sql.sh	2012-03-14 19:44:33 UTC (rev 5715)
@@ -7,18 +7,20 @@
 # TODO is there already a URI form for identifying workflows?
 export WF="${RUNID}"
 
-echo Generating SQL for $RUNID
+echo "Generating SQL for $RUNID:"
 
 # this gives a distinction between the root process for a workflow and the
 # workflow itself. perhaps better to model the workflow as a process
-#echo "BEGIN TRANSACTION;" > /tmp/$RUNID.sql
+echo "    - Root thread."
 echo "INSERT INTO proc (id, type, name, run_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" >> /tmp/$RUNID.sql
 
+echo "    - Function calls."
 while read time duration thread localthread endstate tr_name scratch; do
     echo "INSERT INTO proc (id, type, run_id) VALUES ('$thread', 'execute', '$WF');"  >> /tmp/$RUNID-1.sql
     echo "INSERT INTO app_inv (id, proc_name, start_time, duration, final_state, scratch) VALUES ('$thread', '$tr_name', $time, $duration, '$endstate', '$scratch');"   >> /tmp/$RUNID-2.sql
 done < execute.global.event
 
+echo "    - Application executions."
 while read start_time duration globalid id endstate thread site scratch; do
     # cut off the last component of the thread, so that we end up at the
     # parent thread id which should correspond with the execute-level ID
@@ -26,17 +28,19 @@
     echo  "INSERT INTO app_exec (id, app_inv_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');"  >> /tmp/$RUNID-3.sql
 done < execute2.global.event
 
-
+echo "    - Mapped variables."
 while read dataset filename; do
     echo "INSERT INTO ds (id) VALUES ('$dataset');"  >> /tmp/$RUNID-4.sql
     echo "INSERT INTO file (id, name) VALUES ('$dataset', '$filename');"  >> /tmp/$RUNID-5.sql
 done < dataset-filenames.txt
 
+echo "    - Primitive variables."
 while read dataset idtype equal value rest; do
     echo "INSERT INTO ds (id) VALUES ('$dataset');"  >> /tmp/$RUNID-4.sql
     echo "INSERT INTO in_mem (id, value) VALUES ('$dataset', '$value');"  >> /tmp/$RUNID-5.sql
 done < dataset-values.txt
 
+echo "    - Arrays and structures."
 while read outer inner; do
     echo  "INSERT INTO ds (id) VALUES ('$outer');"  >> /tmp/$RUNID-4.sql
     echo  "INSERT INTO ds (id) VALUES ('$inner');"  >> /tmp/$RUNID-4.sql
@@ -46,7 +50,7 @@
     echo  "INSERT INTO ds_out (proc_id, ds_id, param) VALUES ('${WFID}constructor:$outer', '$outer', 'collection');"  >> /tmp/$RUNID-5.sql
 done < tie-containers.txt
 
-
+echo "    - Operators."
 while read col1 col2 col3 col4 col5 thread name lhs rhs result; do
     thread=$(echo $thread | awk 'BEGIN { FS = "=" }; {print $2}')
     name=$(echo $name | awk 'BEGIN { FS = "=" }; {print $2}')
@@ -65,6 +69,7 @@
     echo  "INSERT INTO ds_out (proc_id, ds_id, param) VALUES ('$operatorid', '$result', 'result');"  >> /tmp/$RUNID-5.sql
 done < operators.txt
 
+echo "    - Built-in functions."
 while read id name output; do
     echo  "INSERT INTO ds (id) VALUES ('$output');"  >> /tmp/$RUNID-4.sql
     echo  "INSERT INTO proc (id, type, name, run_id) VALUES ('$id', 'function', '$name', '$WF');"  >> /tmp/$RUNID-1.sql
@@ -76,11 +81,12 @@
     echo  "INSERT INTO ds_in (proc_id, ds_id, param) VALUES ('$id', '$value', 'undefined');"  >> /tmp/$RUNID-5.sql
 done < function-inputs.txt
 
-
+echo "    - Function call names."
 while read thread appname; do
     echo  "UPDATE proc SET name='$appname' WHERE id='$thread';"  >> /tmp/$RUNID-3.sql
 done < invocation-procedure-names.txt
 
+echo "    - Script run events."
 while read start duration wfid rest; do
     echo "UPDATE run SET start_time=$start WHERE id='$WF';"  >> /tmp/$RUNID-1.sql
     echo "UPDATE run SET duration=$duration WHERE id='$WF';"  >> /tmp/$RUNID-1.sql
@@ -88,6 +94,7 @@
 
 
 # TODO this could merge with other naming tables
+echo "    - Compound functions."
 while read start duration thread final_state procname ; do
     if [ "$duration" != "last-event-line" ]; then
 	compoundid=$WFID$thread
@@ -107,6 +114,7 @@
     echo "INSERT INTO proc (id, type, name, run_id) VALUES ('$thread', 'scope', 'scope', '$WF');"  >> /tmp/$RUNID-1.sql
 done < scopes.txt
 
+echo "    - Variable consumption and production."
 while read thread direction dataset variable rest; do 
     if [ "$direction" == "input" ] ; then
 	table=ds_in
@@ -118,6 +126,7 @@
     echo "INSERT INTO $table (proc_id, ds_id, param) VALUES ('$thread', '$dataset', '$variable');"  >> /tmp/$RUNID-5.sql
 done < tie-data-invocs.txt
 
+echo "    - Wrapper log extra info."
 if [ -f extrainfo.txt ]; then
     while read execute2_id extrainfo; do
 	echo $extrainfo | awk -F ";"  '{ for (i = 1; i <= NF; i++)
@@ -135,6 +144,7 @@
     done < extrainfo.txt
 fi
 
+echo "    - Wrapper log resource consumption info."
 if [ -f runtime.txt ]; then
     while read execute2_id runtime; do
 	#timestamp=$(echo $runtime | awk -F "," '{print $1}' | awk -F ":" '{print $2}')
@@ -168,7 +178,10 @@
 	rm /tmp/$RUNID-$i.sql
 done
 
-echo "COMMIT;" >> /tmp/$RUNID.sql
-$SQLCMD -f /tmp/$RUNID.sql
+#echo "COMMIT;" >> /tmp/$RUNID.sql
+
+echo "Finished SQL generation."
+echo "Exporting provenance to database..."
+$SQLCMD -f /tmp/$RUNID.sql 1> /tmp/$RUNID-provenancedb-import.log
 rm /tmp/$RUNID.sql
-echo Finished sending SQL to DB
\ No newline at end of file
+echo "Finished exporting provenance to database."
\ No newline at end of file

Modified: provenancedb/swift-prov-import-all-logs
===================================================================
--- provenancedb/swift-prov-import-all-logs	2012-03-14 16:49:49 UTC (rev 5714)
+++ provenancedb/swift-prov-import-all-logs	2012-03-14 19:44:33 UTC (rev 5715)
@@ -57,7 +57,7 @@
 		
 		export WF="${RUNID}"
 		
-		echo "BEGIN TRANSACTION;" > /tmp/$WF.sql
+		#echo "BEGIN TRANSACTION;" > /tmp/$WF.sql
 		echo "INSERT INTO run (id, log_filename, swift_version, cog_version, final_state) VALUES ('$WF','$filename','$version', '$COG_VERSION', '$wfstatus');" >> /tmp/$WF.sql
 		
 		echo version $version in log file $filename




More information about the Swift-commit mailing list