[Swift-commit] r4226 - in SwiftApps/SwiftR: . perftools

tga at ci.uchicago.edu tga at ci.uchicago.edu
Mon Mar 28 17:28:46 CDT 2011


Author: tga
Date: 2011-03-28 17:28:46 -0500 (Mon, 28 Mar 2011)
New Revision: 4226

Added:
   SwiftApps/SwiftR/perftools/
   SwiftApps/SwiftR/perftools/parselog.py
Log:
Checking in swift log analysis code for python: building tools for scraping events from logs and visualising.


Added: SwiftApps/SwiftR/perftools/parselog.py
===================================================================
--- SwiftApps/SwiftR/perftools/parselog.py	                        (rev 0)
+++ SwiftApps/SwiftR/perftools/parselog.py	2011-03-28 22:28:46 UTC (rev 4226)
@@ -0,0 +1,135 @@
+import re
+import itertools as its
+import datetime
+from operator import itemgetter
+
+
+def runlength_enc(xs):
+    '''Return a run-length encoded version of the stream, xs.
+    
+    The resulting stream consists of (count, x) pairs.
+    
+    >>> ys = runlength_enc('AAABBCCC')
+    >>> next(ys)
+    (3, 'A')
+    >>> list(ys)
+    [(2, 'B'), (3, 'C')]
+    '''
+    return ((ilen(gp), x) for x, gp in its.groupby(xs))
+
+def ilen(it):
+    '''Return the length of an iterable.
+    
+    >>> ilen(range(7))
+    7
+    '''
+    return sum(1 for _ in it)
+
+
+fname = "rserver2.log"
+    
+
+pattern = (r"^([-\d]+)" + r"\s+"
+         + r"([:\d]+),([\d]+)\+([\d]+)" + r"\s+"
+         + r"([\S]+)" + r"\s+" + r"([\S]+)"
+         + r"(.+)$" )
+
+
+compiled = re.compile(pattern)
+
+time_rex = r"(\d\d):(\d\d):(\d\d)"
+def parse_time(timestr):
+    m = re.match(time_rex, timestr)
+    return int(m.group(1)), int(m.group(2)), int(m.group(3))
+    
+date_rex = r"(\d\d\d\d)-(\d\d)-(\d\d)"
+def parse_date(datestr):
+    m = re.match(date_rex, datestr)
+    if m is None:
+        print datestr
+    return int(m.group(1)), int(m.group(2)), int(m.group(3))
+
+def log_iter(filter_type=None, message_rex=None):   
+    for line in open(fname).readlines():
+        matched = re.match(compiled, line)
+        if matched is None:
+            #print "Error on line" + line
+            pass
+        else:
+            (date, time, milliseconds, something, priority,
+                    type, message) = matched.groups()
+            if filter_type is not None and filter_type != type:
+                continue
+            if message_rex is not None and re.match(message_rex, message) is None:
+                #print message
+                continue
+            yield (date, time, milliseconds, something, priority,
+                    type, message)
+
+
+def time_converted(recs, basetime=None):
+    for (date, time, milliseconds, something, priority,
+                    type, message) in recs:
+
+        year, month, day = parse_date(date)
+        h, m, s = parse_time(time)
+        t = datetime.datetime(year, month, day, h, m, s, int(milliseconds) * 1000)
+        if basetime is not None:
+            if t < basetime:
+                continue
+            td = t - basetime
+            yield (td, priority, type, message)
+        else:
+            yield(t, priority, type, message)
+
+
+def get_cmd_times(filter_type, message_rex=None):
+    return list(runlength_enc((date, time, type)
+                for date, time, milliseconds, something, priority,
+                    type, message in log_iter(filter_type, message_rex)))
+
+
+
+def handle(count, date, time, type, message=None):
+    if message is not None:
+        print message,
+    print count, date, time, type
+
+
+#for count, tup in get_cmd_times("PutFileCommand"):
+#    handle(count, *tup)
+
+#for count, tup in get_cmd_times("GridExec"):
+#    handle(count, *tup)
+
+#for count, tup in get_cmd_times("Cpu", re.compile("^.*jobTerminated.*$")):
+#    handle(count, tup[0], tup[1], tup[2], message="jobTerminated")
+
+
+exec_events = log_iter("vdl:execute")
+
+
+basetime = datetime.datetime(2011, 3, 18, 13, 22, 52, 231*1000)
+exec_events = list(time_converted(exec_events, basetime))
+
+starts = [(t, priority, type, message, message.split()[1])
+            for t, priority, type, message in exec_events 
+            if message.find("START") >= 0]
+ends = [(t, priority, type, message, message.split()[1])
+        for t, priority, type, message in exec_events 
+        if message.find("END_SUCCESS") >= 0]
+
+starts.sort(key=itemgetter(4))
+ends.sort(key=itemgetter(4))
+for s, e in zip(starts[:10], ends[:10]):
+    print s, e
+
+def to_s(td):
+    return float(td.seconds) + float(td.microseconds) / 1000000.0
+
+paired = [(i, s[0], e[0]) for i,s,e in zip(range(len(starts)), starts, ends)]
+import pylab
+for i, s, e in paired:
+    pylab.plot((to_s(s), to_s(e)), (i, i), 'r')
+
+pylab.show()




More information about the Swift-commit mailing list