[Swift-commit] r7460 - trunk/bin

davidk at ci.uchicago.edu davidk at ci.uchicago.edu
Wed Jan 8 15:57:26 CST 2014


Author: davidk
Date: 2014-01-08 15:57:26 -0600 (Wed, 08 Jan 2014)
New Revision: 7460

Modified:
   trunk/bin/swiftlog
Log:
Updated swiftlog script


Modified: trunk/bin/swiftlog
===================================================================
--- trunk/bin/swiftlog	2014-01-03 22:31:17 UTC (rev 7459)
+++ trunk/bin/swiftlog	2014-01-08 21:57:26 UTC (rev 7460)
@@ -1,187 +1,98 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env python
 
-use strict;
-use warnings;
-use File::Basename;
-use Class::Struct;
+import sys
+import os
+import operator
 
-# Task structure
-struct Task          => { 
-    app              => '$',
-    arguments        => '$',
-    host             => '$',
-    replicationGroup => '$',
-    stageIn          => '$',
-    stageOut         => '$',
-    startTime        => '$',
-    stopTime         => '$',
-    taskNumber       => '$',
-    thread           => '$',
-    workdir          => '$',
-};
+if len(sys.argv) != 2:
+    sys.exit("Usage: %s <run_directory>" % sys.argv[0])
 
-# Hash for storing all tasks
-my %tasks = ();
-my $taskCounter = 1;
+# Open log file
+log_directory = os.path.normpath(sys.argv[1])
+log_filename = os.path.join(log_directory, os.path.basename(log_directory) + ".log")
+log_file = open(log_filename, "r")
 
-# Print basic usage info
-sub usage() {
-   &crash("Usage: $0 <logdir>");
-}
+# Class definition for a single Task
+class Task:
+    app        = ''
+    arguments  = ''
+    host       = ''
+    stageIn    = ''
+    stageOut   = ''
+    startTime  = ''
+    stopTime   = ''
+    taskNumber = ''
+    thread     = ''
+    workdir    = ''
 
-# Print error message and exit
-sub crash() {
-   print STDERR "@_\n";
-   exit(1);
-}
+# Dictionary containing all tasks
+tasks = {}
+taskCounter = 1
 
-# Get an existing task of a given jobid, or create a new one and return it
-sub getTask() {
-   if(defined($tasks{$_[0]})) {
-      return $tasks{$_[0]};
-   } else {
-      my $t = Task->new();
-      $tasks{$_[0]} = $t;
-      return $t;
-   }
-}
+# Retrieve Task from dictionary, or create new
+def getTask(taskid):
+    if taskid in tasks:
+        return tasks[taskid]
+    else:
+        t = Task()
+        tasks[taskid] = t
+        return tasks[taskid]
 
-# Record that a task has started
-sub taskStarted() {
-   # 2013-12-17 16:42:13,135+0000 DEBUG swift JOB_START jobid=sleep-k5t2pajl tr=sleep arguments=[1] tmpdir=sleep-run002/jobs/k/sleep-k5t2pajl host=westmere
-   $_[0] =~ s/jobid=|tr=|arguments=|tmpdir=|host=//g;
-   my @entryArray = split(/\s+/, $_[0]);
+# In a log entry, find values that start with value=<nnn>
+def getValue(entry, value):
+    entry_array = entry.split()
+    value += '='
+    for word in entry_array:
+        if word.startswith(value):
+            return word.split(value, 1)[1]
 
-   my $date      = $entryArray[1];
-   my $taskid    = $entryArray[5]; 
-   my $app       = $entryArray[6];
-   my $workdir   = $entryArray[-2];
-   my $host      = $entryArray[-1];
-   my $arguments = &getBracketedText($_[0]);
+# Get timestamp of a log entry
+def getTime(entry):
+   timestamp = entry.split()[1]
+   return timestamp.split(',')[0]
 
-   my $t = &getTask($taskid);
-   $t->taskNumber($taskCounter);
-   $t->app($app);
-   $t->workdir($workdir);
-   $t->arguments($arguments);
-   $t->startTime($date);
-   $t->host($host);
-   $tasks{$taskid} = $t; 
-   $taskCounter += 1;
-}   
+# Get all text between [ and ]
+def getBracketedText(entry):
+   return entry.partition('[')[-1].rpartition(']')[0]
 
-sub taskEnded() {
-   # 2013-12-17 16:42:38,520+0000 DEBUG swift JOB_END jobid=bash-1vngpnjl
-   $_[0] =~ s/jobid=//g;
-   my @entryArray = split(/\s+/, $_[0]);
+# Parse log
+for line in iter(log_file):
 
-   my $date   = $entryArray[1];
-   my $taskid = $entryArray[-1];
- 
-   my $t = &getTask($taskid);
-   $t->stopTime($date);
-}
+    if 'JOB_START' in line:
+        taskid          = getValue(line, "jobid")
+        task            = getTask(taskid)
+        task.app        = getValue(line, "tr")
+        task.startTime  = getTime(line)
+        task.workdir    = getValue(line, "tmpdir")
+        task.host       = getValue(line, "host")
+        task.arguments  = getBracketedText(line)
+        task.taskNumber = taskCounter
+        taskCounter     = taskCounter+1
 
-sub taskStagingIn() {
-   # 2013-12-18 17:38:23,372+0000 INFO  swift START jobid=cat-a3mafpjl - Staging in files [file://localhost/data.txt, file://localhost/data2.txt]
-   $_[0] =~ s/jobid=//g;
-   my @entryArray = split(/\s+/, $_[0]);
-   my $date   = $entryArray[1];
-   my $taskid = $entryArray[5];
-   my $files  = &getBracketedText($_[0]);
-   my $t = &getTask($taskid);
-   $t->stageIn($files);
-}
+    elif 'JOB_END' in line:
+        taskid        = getValue(line, "jobid")
+        task          = getTask(taskid)
+        task.stopTime = getTime(line)
 
-sub taskStagingOut() {
-   # 2013-12-18 17:38:23,349+0000 DEBUG swift FILE_STAGE_OUT_START srcname=catsn.0008.out srcdir=catsn-run016/shared/output srchost=westmere destdir=output desthost=localhost provider=file jobid=cat-83mafpjl
-   $_[0] =~ s/jobid=|srcname=//g;
-   my @entryArray = split(/\s+/, $_[0]);
+    elif "Staging in files" in line:
+        taskid       = getValue(line, "jobid")
+        task         = getTask(taskid)
+        task.stageIn = getBracketedText(line)
 
-   my $taskid = $entryArray[-1];
-   my $file   = $entryArray[5];
- 
-   my $t = &getTask($taskid);
-   if(defined($t->stageOut())) {
-      $t->stageOut($t->stageOut() . "$file ");
-   } else {
-      $t->stageOut("$file");
-   }
-}
+    elif "FILE_STAGE_OUT_START" in line:
+        taskid         = getValue(line, "jobid")
+        task           = getTask(taskid)
+        file_out       = getValue(line, "srcname")
+        task.stageOut += file_out + " "
 
-sub getBracketedText() {
-   my $result = "";
-   $_[0] =~ /\[([^\]]*)\]/x;
-   if(defined($1)) {
-      $result = $1;
-      $result =~ s/,//g;
-   }
-   return $result;
-}
-
-sub printTasks() {
-   no warnings;
-   foreach my $key (sort { $tasks{$a}->taskNumber <=> $tasks{$b}->taskNumber } keys %tasks) {
-      my $value = $tasks{$key}; 
-      printf "Task %s\n" .
-             "\tApp name = %s\n" . 
-             "\tCommand line arguments = %s\n" .
-             "\tHost = %s\n" .
-             "\tStart time = %s\n" .
-             "\tStop time = %s\n" .
-             "\tWork directory = %s\n" .
-             "\tStaged in files = %s\n" .
-             "\tStaged out files = %s\n\n",
-             $value->taskNumber, 
-             $value->app, 
-             $value->arguments,
-             $value->host, 
-             $value->startTime,
-             $value->stopTime,
-             $value->workdir,
-             $value->stageIn,
-             $value->stageOut,
-   }
-}
-
-# Check usage
-if ( !$ARGV[0] ) {
-   &usage();
-}
-
-# Verify $run_directory
-my $run_directory = $ARGV[0];
-if ( ! -d "$run_directory" ) {
-   &crash("Directory $run_directory does not exist!");
-}
-
-# Open Swift log
-my $swift_log_name = "$run_directory/" . basename($run_directory) . ".log";
-open(SWIFTLOG, $swift_log_name) || &crash("Unable to open log file $swift_log_name");
-
-# Read log, send entries we care about to the right place
-while(my $line = <SWIFTLOG>) {
-
-   if ( $line =~ m/JOB_START/o ) {
-      &taskStarted($line);
-      next;
-   }
-
-   elsif ( $line =~ m/JOB_END/o ) {
-      &taskEnded($line);
-      next;
-   }
-
-   elsif ( $line =~ m/Staging in files/o ) {
-      &taskStagingIn($line);
-      next;
-   }
-
-   elsif ( $line =~ m/FILE_STAGE_OUT_START/o ) { 
-      &taskStagingOut($line);
-      next;
-   }
-}
-
 # Print tasks
-&printTasks();
+for t in sorted(tasks.values(), key=operator.attrgetter('taskNumber')):
+    print "Task: %s" % t.taskNumber
+    print "\tApp name: %s" % t.app
+    print "\tCommand line arguments: %s" % t.arguments
+    print "\tHost: %s" % t.host
+    print "\tStart time: %s" % t.startTime
+    print "\tStop time: %s" % t.stopTime
+    print "\tWork directory: %s" % t.workdir
+    print "\tStaged in: %s" % t.stageIn
+    print "\tStaged out: %s\n" % t.stageOut




More information about the Swift-commit mailing list