[Swift-commit] r4229 - provenancedb

lgadelha at ci.uchicago.edu lgadelha at ci.uchicago.edu
Tue Mar 29 23:14:03 CDT 2011


Author: lgadelha
Date: 2011-03-29 23:14:02 -0500 (Tue, 29 Mar 2011)
New Revision: 4229

Added:
   provenancedb/info-to-runtime
Modified:
   provenancedb/prepare-provenance-chart
   provenancedb/prov-init.sql
   provenancedb/prov-to-sql.sh
   provenancedb/swift-prov-import-all-logs
Log:
Import /proc runtime info into the provenance database.


Added: provenancedb/info-to-runtime
===================================================================
--- provenancedb/info-to-runtime	                        (rev 0)
+++ provenancedb/info-to-runtime	2011-03-30 04:14:02 UTC (rev 4229)
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+# must have $EXECUTE2PREFIX appropriately by containing script
+
+while read t d id rest; do
+    echo processing wrapper file for job $id >&2
+    record=$(find ${IDIR} -name ${id}-info)
+    
+    globalid=$EXECUTE2PREFIX$id
+    
+    if [ "X$record" != "X" ] && [ -f $record ] ; then
+	
+	grep '^RUNTIME_INFO=' $record | sed "s/^RUNTIME_INFO=\(.*\)$/$globalid \1/"
+	
+    else
+	echo no wrapper log for $id >&2
+    fi
+    
+done < execute2.event

Modified: provenancedb/prepare-provenance-chart
===================================================================
--- provenancedb/prepare-provenance-chart	2011-03-29 22:08:35 UTC (rev 4228)
+++ provenancedb/prepare-provenance-chart	2011-03-30 04:14:02 UTC (rev 4229)
@@ -27,6 +27,8 @@
 
 info-to-extrainfo > extrainfo.txt
 
+info-to-runtime > runtime.txt
+
 cat $1 | grep ' OPERATOR ' | sed 's/^.*thread=\([^ ]*\) operator="\([^ ]*\)" lhs=\([^ ]*\) rhs=\([^ ]*\) result=\([^ ]*\).*$/\1 \2 \3 \4 \5/' > operators.txt
 
 

Modified: provenancedb/prov-init.sql
===================================================================
--- provenancedb/prov-init.sql	2011-03-29 22:08:35 UTC (rev 4228)
+++ provenancedb/prov-init.sql	2011-03-30 04:14:02 UTC (rev 4229)
@@ -28,7 +28,7 @@
     (id            varchar(256) primary key,
      log_filename  varchar(2048),
      swift_version varchar(16),
-     import_status varchar(16),
+     final_state  varchar(16),
      start_time    numeric,
      duration      numeric
     );
@@ -40,9 +40,9 @@
 -- has an entry for this process.
 -- process types: internal, rootthread, execute, function, compound, scope, operator
 create table process
-    (id          varchar(256) primary key, 
-     type        varchar(16),
-     name	     varchar(256), -- in the case of an execute this refers to the transformation name in tc.data
+    (id     varchar(256) primary key, 
+     type   varchar(16),
+     name   varchar(256), -- in the case of an execute this refers to the transformation name in tc.data
      run_id varchar(256) references run (id) on delete cascade   -- normalize: workflow_id of sub-procedure determined
           	 	      		 	       	  	 	   -- by compound procedure 
     );
@@ -63,25 +63,25 @@
 -- perform an execution. the execute2 id is tied to per-execution-attempt
 -- information such as wrapper logs
 create table app_execution
-    (id          varchar(256) primary key,
-     app_invocation_id  varchar(256) references app_invocation (id) on delete cascade, 
-     start_time  numeric,
-     duration    numeric,
-     final_state varchar(16),
-     site        varchar(256)
+    (id                varchar(256) primary key,
+     app_invocation_id varchar(256) references app_invocation (id) on delete cascade, 
+     start_time        numeric,
+     duration          numeric,
+     final_state       varchar(16),
+     site              varchar(256)
     );
 
 -- app execution runtime info extracted from the /proc filesystem (assumes the app executed
 -- in a Linux host) 
 create table runtime_info
    ( app_execution_id   varchar(256) references app_execution (id) on delete cascade, 
-     event_timestamp	numeric,
+     tstamp		numeric,
      cpu_usage          numeric,
      max_phys_mem	numeric,
      max_virtual_mem	numeric,
      io_read_bytes	numeric,
      io_write_bytes	numeric,
-     primary key (app_execution_id, event_timestamp)
+     primary key (app_execution_id, tstamp)
    );
 
 -- ds stores all dataset identifiers.
@@ -194,7 +194,7 @@
 
 
 CREATE VIEW run_date AS
-  SELECT id, log_filename, swift_version, import_status, 
+  SELECT id, log_filename, swift_version, final_state, 
          to_timestamp(start_time) as start_time, duration
   FROM   run;
 

Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh	2011-03-29 22:08:35 UTC (rev 4228)
+++ provenancedb/prov-to-sql.sh	2011-03-30 04:14:02 UTC (rev 4229)
@@ -8,7 +8,7 @@
 export WF="${RUNID}"
 
 echo Generating SQL for $RUNID
-rm -f tmp-u.sql tmp-ds.sql tmp-p.sql tmp-e.sql tmp-e2.sql tmp-dsu.sql tmp-dsc.sql tmp-f.sql tmp-v.sql tmp-import.sql import.sql extrainfo.sql fields.txt
+rm -f tmp-u.sql tmp-ds.sql tmp-p.sql tmp-e.sql tmp-e2.sql tmp-dsu.sql tmp-dsc.sql tmp-f.sql tmp-v.sql tmp-import.sql import.sql extrainfo.sql fields.txt runtime.sql
 
 
 # this gives a distinction between the root process for a workflow and the
@@ -181,5 +181,17 @@
 
 $SQLCMD < extrainfo.sql
 
+while read execute2_id runtime; do
+    timestamp=$(echo $runtime | awk -F "," '{print $1}' | awk -F ":" '{print $2}')
+    cpu_usage=$(echo $runtime | awk -F "," '{print $2}' | awk -F ":" '{print $2}')
+    max_phys_mem=$(echo $runtime | awk -F "," '{print $3}' | awk -F ":" '{print $2}')
+    max_virtual_mem=$(echo $runtime | awk -F "," '{print $4}' | awk -F ":" '{print $2}')
+    io_read_bytes=$(echo $runtime | awk -F "," '{print $5}' | awk -F ":" '{print $2}')
+    io_write_bytes=$(echo $runtime | awk -F "," '{print $6}' | awk -F ":" '{print $2}')
+    echo "INSERT INTO runtime_info (app_execution_id, tstamp, cpu_usage, max_phys_mem, max_virtual_mem, io_read_bytes, io_write_bytes) VALUES ('$execute2_id', $timestamp, $cpu_usage, $max_phys_mem, $max_virtual_mem, $io_read_bytes, $io_write_bytes);" >> runtime.sql
+done < runtime.txt
+
+$SQLCMD < runtime.sql
+
 echo Finished sending SQL to DB
 

Modified: provenancedb/swift-prov-import-all-logs
===================================================================
--- provenancedb/swift-prov-import-all-logs	2011-03-29 22:08:35 UTC (rev 4228)
+++ provenancedb/swift-prov-import-all-logs	2011-03-30 04:14:02 UTC (rev 4229)
@@ -45,7 +45,7 @@
 	    if [ $PROV_ENABLED -gt 0 ]; then
 		echo IMPORTING
 		
-		if grep --silent "DEBUG Loader Swift finished with no errors" $filename; then
+		if grep --silent "INFO  Loader Swift finished with no errors" $filename; then
 		    wfstatus="SUCCESS"
     		else
 		    wfstatus="FAIL"
@@ -55,7 +55,7 @@
 		
 		export WF="${RUNID}"
 		
-		echo "INSERT INTO run (id, log_filename, swift_version, import_status) VALUES ('$WF','$filename','$version','$wfstatus');" | $SQLCMD
+		echo "INSERT INTO run (id, log_filename, swift_version, final_state) VALUES ('$WF','$filename','$version','$wfstatus');" | $SQLCMD
 		
 		echo version $version in log file $filename
 		echo ============= will import =============




More information about the Swift-commit mailing list