[Swift-commit] r7461 - branches/release-0.95/bin
davidk at ci.uchicago.edu
davidk at ci.uchicago.edu
Wed Jan 8 15:57:59 CST 2014
Author: davidk
Date: 2014-01-08 15:57:59 -0600 (Wed, 08 Jan 2014)
New Revision: 7461
Modified:
branches/release-0.95/bin/swiftlog
Log:
Updated swiftlog script
Modified: branches/release-0.95/bin/swiftlog
===================================================================
--- branches/release-0.95/bin/swiftlog 2014-01-08 21:57:26 UTC (rev 7460)
+++ branches/release-0.95/bin/swiftlog 2014-01-08 21:57:59 UTC (rev 7461)
@@ -1,187 +1,98 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env python
-use strict;
-use warnings;
-use File::Basename;
-use Class::Struct;
+import sys
+import os
+import operator
-# Task structure
-struct Task => {
- app => '$',
- arguments => '$',
- host => '$',
- replicationGroup => '$',
- stageIn => '$',
- stageOut => '$',
- startTime => '$',
- stopTime => '$',
- taskNumber => '$',
- thread => '$',
- workdir => '$',
-};
+if len(sys.argv) != 2:
+ sys.exit("Usage: %s <run_directory>" % sys.argv[0])
-# Hash for storing all tasks
-my %tasks = ();
-my $taskCounter = 1;
+# Open log file
+log_directory = os.path.normpath(sys.argv[1])
+log_filename = os.path.join(log_directory, os.path.basename(log_directory) + ".log")
+log_file = open(log_filename, "r")
-# Print basic usage info
-sub usage() {
- &crash("Usage: $0 <logdir>");
-}
+# Class definition for a single Task
+class Task:
+ app = ''
+ arguments = ''
+ host = ''
+ stageIn = ''
+ stageOut = ''
+ startTime = ''
+ stopTime = ''
+ taskNumber = ''
+ thread = ''
+ workdir = ''
-# Print error message and exit
-sub crash() {
- print STDERR "@_\n";
- exit(1);
-}
+# Dictionary containing all tasks
+tasks = {}
+taskCounter = 1
-# Get an existing task of a given jobid, or create a new one and return it
-sub getTask() {
- if(defined($tasks{$_[0]})) {
- return $tasks{$_[0]};
- } else {
- my $t = Task->new();
- $tasks{$_[0]} = $t;
- return $t;
- }
-}
+# Retrieve Task from dictionary, or create new
+def getTask(taskid):
+ if taskid in tasks:
+ return tasks[taskid]
+ else:
+ t = Task()
+ tasks[taskid] = t
+ return tasks[taskid]
-# Record that a task has started
-sub taskStarted() {
- # 2013-12-17 16:42:13,135+0000 DEBUG swift JOB_START jobid=sleep-k5t2pajl tr=sleep arguments=[1] tmpdir=sleep-run002/jobs/k/sleep-k5t2pajl host=westmere
- $_[0] =~ s/jobid=|tr=|arguments=|tmpdir=|host=//g;
- my @entryArray = split(/\s+/, $_[0]);
+# In a log entry, find values that start with value=<nnn>
+def getValue(entry, value):
+ entry_array = entry.split()
+ value += '='
+ for word in entry_array:
+ if word.startswith(value):
+ return word.split(value, 1)[1]
- my $date = $entryArray[1];
- my $taskid = $entryArray[5];
- my $app = $entryArray[6];
- my $workdir = $entryArray[-2];
- my $host = $entryArray[-1];
- my $arguments = &getBracketedText($_[0]);
+# Get timestamp of a log entry
+def getTime(entry):
+ timestamp = entry.split()[1]
+ return timestamp.split(',')[0]
- my $t = &getTask($taskid);
- $t->taskNumber($taskCounter);
- $t->app($app);
- $t->workdir($workdir);
- $t->arguments($arguments);
- $t->startTime($date);
- $t->host($host);
- $tasks{$taskid} = $t;
- $taskCounter += 1;
-}
+# Get all text between [ and ]
+def getBracketedText(entry):
+ return entry.partition('[')[-1].rpartition(']')[0]
-sub taskEnded() {
- # 2013-12-17 16:42:38,520+0000 DEBUG swift JOB_END jobid=bash-1vngpnjl
- $_[0] =~ s/jobid=//g;
- my @entryArray = split(/\s+/, $_[0]);
+# Parse log
+for line in iter(log_file):
- my $date = $entryArray[1];
- my $taskid = $entryArray[-1];
-
- my $t = &getTask($taskid);
- $t->stopTime($date);
-}
+ if 'JOB_START' in line:
+ taskid = getValue(line, "jobid")
+ task = getTask(taskid)
+ task.app = getValue(line, "tr")
+ task.startTime = getTime(line)
+ task.workdir = getValue(line, "tmpdir")
+ task.host = getValue(line, "host")
+ task.arguments = getBracketedText(line)
+ task.taskNumber = taskCounter
+ taskCounter = taskCounter+1
-sub taskStagingIn() {
- # 2013-12-18 17:38:23,372+0000 INFO swift START jobid=cat-a3mafpjl - Staging in files [file://localhost/data.txt, file://localhost/data2.txt]
- $_[0] =~ s/jobid=//g;
- my @entryArray = split(/\s+/, $_[0]);
- my $date = $entryArray[1];
- my $taskid = $entryArray[5];
- my $files = &getBracketedText($_[0]);
- my $t = &getTask($taskid);
- $t->stageIn($files);
-}
+ elif 'JOB_END' in line:
+ taskid = getValue(line, "jobid")
+ task = getTask(taskid)
+ task.stopTime = getTime(line)
-sub taskStagingOut() {
- # 2013-12-18 17:38:23,349+0000 DEBUG swift FILE_STAGE_OUT_START srcname=catsn.0008.out srcdir=catsn-run016/shared/output srchost=westmere destdir=output desthost=localhost provider=file jobid=cat-83mafpjl
- $_[0] =~ s/jobid=|srcname=//g;
- my @entryArray = split(/\s+/, $_[0]);
+ elif "Staging in files" in line:
+ taskid = getValue(line, "jobid")
+ task = getTask(taskid)
+ task.stageIn = getBracketedText(line)
- my $taskid = $entryArray[-1];
- my $file = $entryArray[5];
-
- my $t = &getTask($taskid);
- if(defined($t->stageOut())) {
- $t->stageOut($t->stageOut() . "$file ");
- } else {
- $t->stageOut("$file");
- }
-}
+ elif "FILE_STAGE_OUT_START" in line:
+ taskid = getValue(line, "jobid")
+ task = getTask(taskid)
+ file_out = getValue(line, "srcname")
+ task.stageOut += file_out + " "
-sub getBracketedText() {
- my $result = "";
- $_[0] =~ /\[([^\]]*)\]/x;
- if(defined($1)) {
- $result = $1;
- $result =~ s/,//g;
- }
- return $result;
-}
-
-sub printTasks() {
- no warnings;
- foreach my $key (sort { $tasks{$a}->taskNumber <=> $tasks{$b}->taskNumber } keys %tasks) {
- my $value = $tasks{$key};
- printf "Task %s\n" .
- "\tApp name = %s\n" .
- "\tCommand line arguments = %s\n" .
- "\tHost = %s\n" .
- "\tStart time = %s\n" .
- "\tStop time = %s\n" .
- "\tWork directory = %s\n" .
- "\tStaged in files = %s\n" .
- "\tStaged out files = %s\n\n",
- $value->taskNumber,
- $value->app,
- $value->arguments,
- $value->host,
- $value->startTime,
- $value->stopTime,
- $value->workdir,
- $value->stageIn,
- $value->stageOut,
- }
-}
-
-# Check usage
-if ( !$ARGV[0] ) {
- &usage();
-}
-
-# Verify $run_directory
-my $run_directory = $ARGV[0];
-if ( ! -d "$run_directory" ) {
- &crash("Directory $run_directory does not exist!");
-}
-
-# Open Swift log
-my $swift_log_name = "$run_directory/" . basename($run_directory) . ".log";
-open(SWIFTLOG, $swift_log_name) || &crash("Unable to open log file $swift_log_name");
-
-# Read log, send entries we care about to the right place
-while(my $line = <SWIFTLOG>) {
-
- if ( $line =~ m/JOB_START/o ) {
- &taskStarted($line);
- next;
- }
-
- elsif ( $line =~ m/JOB_END/o ) {
- &taskEnded($line);
- next;
- }
-
- elsif ( $line =~ m/Staging in files/o ) {
- &taskStagingIn($line);
- next;
- }
-
- elsif ( $line =~ m/FILE_STAGE_OUT_START/o ) {
- &taskStagingOut($line);
- next;
- }
-}
-
# Print tasks
-&printTasks();
+for t in sorted(tasks.values(), key=operator.attrgetter('taskNumber')):
+ print "Task: %s" % t.taskNumber
+ print "\tApp name: %s" % t.app
+ print "\tCommand line arguments: %s" % t.arguments
+ print "\tHost: %s" % t.host
+ print "\tStart time: %s" % t.startTime
+ print "\tStop time: %s" % t.stopTime
+ print "\tWork directory: %s" % t.workdir
+ print "\tStaged in: %s" % t.stageIn
+ print "\tStaged out: %s\n" % t.stageOut
More information about the Swift-commit
mailing list