[Darshan-commits] [Git][darshan/darshan][master] 4 commits: updated for dxt_analyzer from LBNL

Fri Sep 22 10:33:06 CDT 2017

Shane Snyder pushed to branch master at darshan / darshan


Commits:
d76496b2 by asim at 2017-09-18T11:20:42-07:00
updated for dxt_analyzer from LBNL

- - - - -
565e5b24 by asim at 2017-09-18T11:21:45-07:00
updated for dxt_analyzer from LBNL

- - - - -
774b5d94 by asim at 2017-09-18T12:02:32-07:00
second update for dxt_analyzer from LBNL

- - - - -
09531f17 by Shane Snyder at 2017-09-22T10:33:02-05:00
Merge branch 'master' into 'master'

dxt_analyzer

See merge request !16
- - - - -


3 changed files:

- darshan-util/Makefile.in
- + darshan-util/doc/dxt_analyzer.txt
- + darshan-util/dxt_analyzer.py


Changes:

=====================================
darshan-util/Makefile.in
=====================================

--- a/darshan-util/Makefile.in
+++ b/darshan-util/Makefile.in
@@ -144,6 +144,7 @@ install:: all
 	install -m 755 darshan-dxt-parser $(bindir)
 	install -m 755 darshan-merge $(bindir)
 	install -m 755 $(srcdir)/darshan-summary-per-file.sh $(bindir)
+	install -m 755 dxt_analyzer.py $(bindir)
 	install -m 755 libdarshan-util.a $(libdir)
 ifeq ($(DARSHAN_ENABLE_SHARED),1)
 	install -m 755 libdarshan-util.so $(libdir)


=====================================
darshan-util/doc/dxt_analyzer.txt
=====================================
--- /dev/null
+++ b/darshan-util/doc/dxt_analyzer.txt
@@ -0,0 +1,47 @@
+# Email questions to SDMSUPPORT at LBL.GOV
+# Scientific Data Management Research Group
+# Lawrence Berkeley National Laboratory
+#
+# last update on Mon Aug  7 08:47:28 PDT 2017
+
+
+To plot the read or write activity from Darshan Extended Trace (DXT) logs.
+
+% ./dxt_analyzer.py --help
+usage: dxt_analyzer.py [-h] -i DXT_LOGNAME [-o SAVEFIG] [--show] [--read]
+                            [--filemode] [-f FNAME]
+
+io activity plot from dxt log
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -i DXT_LOGNAME, --input DXT_LOGNAME
+                        dxt log path
+  -o SAVEFIG, --save SAVEFIG
+                        output file name for the plot
+  --show                Show the plot rather than saving to a PDF
+  --read                READ I/O action to be plotted. 
+                        Default is False for WRITE mode.
+  --filemode            Single file mode (must be used with --fname). 
+                        Default is False for all files
+  -f FNAME, --fname FNAME
+                        name of file to be plotted (must use with --filemode)
+
+Example runs:
+% python dxt_analyzer.py -i darshan_dxt-a.txt 
+
+% python dxt_analyzer.py -i darshan_dxt-a.txt \
+        --filemode -f /global/cscratch1/sd/asim/amrex/a24/plt00000.hdf5
+
+% python dxt_analyzer.py -i darshan_dxt-d.txt \
+	-o dxt-d.pdf
+
+% python dxt_analyzer.py -i darshan_dxt-df.txt 
+
+% python dxt_analyzer.py -i darshan_dxt-c.txt 
+
+% python dxt_analyzer.py -i darshan_dxt-v.txt 
+
+
+For more information on creating DXT logs, see:
+http://www.mcs.anl.gov/research/projects/darshan/docs/darshan3-util.html#_darshan_dxt_parser 


=====================================
darshan-util/dxt_analyzer.py
=====================================
--- /dev/null
+++ b/darshan-util/dxt_analyzer.py
@@ -0,0 +1,300 @@
+#!/usr/bin/env python2
+
+###
+#
+# *** Copyright Notice ***
+#
+# dxt_analyzer.py 
+# Copyright (c) 2017, The Regents of the University of California, 
+# through Lawrence Berkeley National Laboratory (subject to receipt
+# of any required approvals from the U.S. Dept. of Energy).  
+# All rights reserved.
+#
+# If you have questions about your rights to use or distribute this software, 
+# please contact Berkeley Lab's Innovation & Partnerships Office 
+# at IPO at lbl.gov.
+#
+# NOTICE.  This software was developed under funding from the 
+# U.S. Department of Energy.  As such, the U.S. Government has been granted 
+# for itself and others acting on its behalf a paid-up, nonexclusive, 
+# irrevocable, worldwide license in the Software to reproduce, prepare 
+# derivative works, and perform publicly and display publicly.  Beginning 
+# five (5) years after the date permission to assert copyright is obtained 
+# from the U.S. Department of Energy, and subject to any subsequent five (5) 
+# year renewals, the U.S. Government is granted for itself and others acting 
+# on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in 
+# the Software to reproduce, prepare derivative works, distribute copies to 
+# the public, perform publicly and display publicly, and to permit others to 
+# do so.
+#
+# Email questions to SDMSUPPORT at LBL.GOV
+# Scientific Data Management Research Group
+# Lawrence Berkeley National Laboratory
+#
+# last update on Tue Aug  8 08:16:49 PDT 2017
+#
+
+
+'''
+dxt_analyzer.py
+To plot the read or write activity from Darshan Extended Trace (DXT) logs.
+
+For more information on creating DXT logs, see:
+http://www.mcs.anl.gov/research/projects/darshan/docs/darshan3-util.html#_darshan_dxt_parser 
+
+% ./io_activitity_dxt.py --help
+usage: dxt_analyzer.py [-h] -i DXT_LOGNAME [-o SAVEFIG] [--show] [--read]
+                            [--filemode] [-f FNAME]
+
+io activity plot from dxt log
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -i DXT_LOGNAME, --input DXT_LOGNAME
+                        dxt log path
+  -o SAVEFIG, --save SAVEFIG
+                        output file name for the plot
+  --show                Show the plot rather than saving to a PDF
+  --read                READ I/O action to be plotted. 
+                        Default is False for WRITE mode.
+  --filemode            Single file mode (must be used with --fname)
+                        Default is False for all files
+  -f FNAME, --fname FNAME
+                        name of file to be plotted (must use with --filemode)
+
+e.g.
+python dxt_analyzer.py -i darshan_dxt-a.txt
+python dxt_analyzer.py -i darshan_dxt-a.txt \
+        --filemode -f /global/cscratch1/sd/asim/amrex/a24/plt00000.hdf5
+python dxt_analyzer.py -i darshan_dxt-c.txt
+python dxt_analyzer.py -i darshan_dxt-d.txt -o dxt-d.pdf
+python dxt_analyzer.py -i darshan_dxt-df.txt
+python dxt_analyzer.py -i darshan_dxt-v.txt
+
+'''
+
+import numpy as np
+import matplotlib
+matplotlib.use('PDF')
+import matplotlib.pyplot as plt
+import re
+import argparse
+
+#------------------------------------------------------------------------------
+# Regular expression and helper funtion definitions
+#------------------------------------------------------------------------------
+global_finfo = {} 
+#'filename':{'rw':[], 'mount':'', 'fs':'', 'stripe_size':-1, 'stripe_width':-1, 'OSTlist':[]}
+#                    Module          rank         write/read      segment               offset                   length                 start                     end                    OST
+POSIX_LOG_PATTERN = ' (X_POSIX)\s+([+-]?\d+(?:\.\d+)?)\s+(\S+)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+\[\s*([+-]?\d+(?:\.\d+)?)\]\.*'
+POSIX_LOG_NO_OSTS = ' (X_POSIX)\s+([+-]?\d+(?:\.\d+)?)\s+(\S+)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)'
+
+#                    Module          rank         write/read      segment               length                   start                     end 
+MPIIO_LOG_PATTERN = ' (X_MPIIO)\s+([+-]?\d+(?:\.\d+)?)\s+(\S+)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)'
+
+HEADER_PATTERN = '(#\s+)(\S+):(\s+)(\d+)'
+COMMENT_PATTERN_DXT = '# \.*'
+USEFUL_COMMENT_PATTERN = '# DXT,\.*'
+LISTNUM_PATTERN = '(\d+.*)'
+
+header = re.compile(HEADER_PATTERN)
+validposix = re.compile(POSIX_LOG_PATTERN)
+posix_no_ost = re.compile(POSIX_LOG_NO_OSTS)
+validmpiio = re.compile(MPIIO_LOG_PATTERN)
+comment_dxt = re.compile(COMMENT_PATTERN_DXT)
+use_comment = re.compile(USEFUL_COMMENT_PATTERN)
+listnumpattern = re.compile(LISTNUM_PATTERN)
+
+
+def store_useful_comment(pieces, finfo_dict, curr_fname):
+    '''updates a file's dictionary entry using the pieces of a comment/header line from a DXT log'''
+    attribute = pieces[0].replace('# DXT, ', '')
+    if attribute=='file_id':
+        curr_fname = pieces[2].replace(' ','').rstrip('\n')
+        if curr_fname not in finfo_dict.keys():
+            finfo_dict[curr_fname] = {'mount':'', 'fs':'', 'stripe_size':-1, 'stripe_width':-1, 'OST_list':[]}
+        return curr_fname
+    elif attribute=='mnt_pt':
+        finfo_dict[curr_fname]['mount'] = pieces[1].replace(', fs_type', '').replace(' ', '').rstrip('\n')
+        finfo_dict[curr_fname]['fs'] = pieces[2].replace(' ', '').rstrip('\n')
+    elif attribute=='Lustre stripe_size':
+        finfo_dict[curr_fname]['stripe_size'] = int(pieces[1].replace(', Lustre stripe_count', '').replace(' ', ''))
+        finfo_dict[curr_fname]['stripe_width'] = int(pieces[2].replace(' ',''))
+    elif attribute=='Lustre OST obdidx':
+        liststr = pieces[1][1:]
+        listnums = listnumpattern.match(liststr).groups()[0]
+        OSTlist = [int(x) for x in re.findall(r'\d+', listnums)]
+        finfo_dict[curr_fname]['OST_list'] = OSTlist
+    return curr_fname
+
+
+def parse_dxt_log_line(line, curr_fname, finfo_dict):
+    '''takes a DXT log line and stores pertinent file metadata into a dictionary, and parses read/write trace data into a tuple'''
+    if not line: return (curr_fname, (),  -1)
+    pieces = line.split(":")
+    if comment_dxt.match(pieces[0]):
+        if use_comment.match(pieces[0]):
+            curr_fname = store_useful_comment(pieces, finfo_dict, curr_fname)
+            return (curr_fname, (), -1)
+        header_match = header.match(line)
+        if header_match:
+            return (curr_fname, (header_match.group(2),int(header_match.group(4))), -11)
+        else:
+            return (curr_fname, (), -1)
+    data = validposix.match(line)
+    if data:
+        return (curr_fname,
+            (
+            data.group(1).replace('X_', '')+'_'+(data.group(3).upper()), 
+            (
+                int(data.group(2)), #rank
+                int(data.group(6)), #length
+                float(data.group(7)), #start
+                float(data.group(8)), #end
+                int(data.group(9))  #OST
+            )
+        ), 1)
+    data = posix_no_ost.match(line)
+    if data:
+        return (curr_fname,
+            (
+            data.group(1).replace('X_', '')+'_'+(data.group(3).upper()), 
+            (
+                int(data.group(2)), #rank
+                int(data.group(6)), #length
+                float(data.group(7)), #start
+                float(data.group(8)), #end
+                -1 # No OST
+            )
+        ), 1)
+    data = validmpiio.match(line)
+    if data:
+        return (curr_fname,
+            (
+            data.group(1).replace('X_', '')+'_'+(data.group(3).upper()), 
+            (
+                int(data.group(2)), #rank
+                int(data.group(5)), #length
+                float(data.group(6)), #start
+                float(data.group(7)), #end
+            )
+        ), 1)
+    return (curr_fname, (), -1)
+
+
+def get_verts_file(data, module, fname, action):
+    '''make a list of rectangle vertices to plot the posix/mpi read/write activity for each rank for a specific file'''
+    keyword = module+'_'+action
+    filedata = map(lambda x: x[1], filter(lambda x: x[0]==fname, data))
+    filtered = map(lambda x: x[1], filter(lambda x: x[0]==keyword, filedata))
+    activities = map(lambda x: (x[0], (x[2], x[3])), filtered)
+    verts = np.zeros((1,4,2))
+    for entry in activities:
+        lx,rx,by,ty = entry[1][0], entry[1][1], entry[0] - 0.5 , entry[0] + 0.5
+        newverts = np.array([((lx,by), (lx,ty), (rx, ty), (rx, by))])
+        verts = np.concatenate((verts, newverts))
+    return verts[1:]
+
+
+def get_verts_all(data, module, action):
+    '''make a list of rectangle vertices to plot the posix/mpi read/write activity for each rank'''
+    keyword = module+'_'+action
+    IOdata = map(lambda x: x[1], data)
+    filtered = map(lambda x: x[1], filter(lambda x: x[0]==keyword, IOdata))
+    activities = map(lambda x: (x[0], (x[2], x[3])), filtered)
+    verts = np.zeros((1,4,2))
+    for entry in activities:
+        lx,rx,by,ty = entry[1][0], entry[1][1], entry[0] - 0.5 , entry[0] + 0.5
+        newverts = np.array([((lx,by), (lx,ty), (rx, ty), (rx, by))])
+        verts = np.concatenate((verts, newverts))
+    return verts[1:]
+
+
+#------------------------------------------------------------------------------
+# Set variables and create plots
+#------------------------------------------------------------------------------
+
+# Variables to be set by user
+# name of DXT log file
+dxt_logname = './darshan_dxt-5967365.txt' 
+# create plot for a specific file (mode='file') or for all files (mode='all')
+singlefile_mode = False # for 'all' 
+mode = 'all' 
+# name of file to be plotted (must use with mode='file')
+fname = '/global/cscratch1/sd/junmin/heatTxf.n64.s32/ph5.4096.n128s64.t1.h5'
+# FLAG for READ I/O action to be plotted.
+read_flag = False  # for READ action 
+action = 'WRITE' 
+# filename to save the plot (must end with .pdf).
+# showflag takes priority than savefig.
+showflag = False # If you do not want to save, set this to True. 
+savefig = 'dxt_plot.pdf' 
+
+parser = argparse.ArgumentParser(description='io activity plot from dxt log')
+parser.add_argument("-i", "--input", action="store", dest="dxt_logname", required=True, help="dxt log path")
+parser.add_argument("-o", "--save", action="store", dest="savefig", required=False, help="output file name for the plot")
+parser.add_argument("--show", action="store_true", dest="showflag", required=False, help="Show the plot rather than saving to a PDF")
+parser.add_argument("--read", action="store_true", dest="read_flag", required=False, help="READ I/O action to be plotted. Default is False for WRITE mode.")
+parser.add_argument("--filemode", action="store_true", dest="singlefile_mode", required=False, help="Single file mode (must be used with --fname). Default is False for all files")
+parser.add_argument("-f", "--fname", action="store", dest="fname", required=False, help="name of file to be plotted (must use with --filemode)")
+
+#args = parser.parse_args(['-l', '/Users/asim/Desktop/simcodes/vpic/runs/ttest/junmin-darshan_dxt-5967365.txt', 
+#'--save', 'dxt_plot.pdf'])
+args = parser.parse_args()     # uncomment this line for general use
+
+dxt_logname = args.dxt_logname
+if (args.savefig):
+    savefig = args.savefig
+if (args.showflag):
+    showflag = True
+if (args.read_flag): 
+    read_flag = True
+    action = 'READ'
+if (args.singlefile_mode): 
+    singlefile_mode = True
+    mode = 'file'
+    fname = args.fname
+    
+
+with open(dxt_logname) as infile:
+    finfo_dict = {}
+    curr_fname = ''
+    logdata = []
+    for line in infile:
+        curr_fname, data, flag = parse_dxt_log_line(line, curr_fname, finfo_dict)
+        if flag == -11:
+            (k,v) = data
+            if k == 'jobid': jobid = v
+            elif k == 'start_time': start_time = v
+            elif k == 'end_time': end_time = v
+            elif k == 'nprocs': nprocs = v
+        elif flag == 1:
+            logdata.append((curr_fname, data))
+
+fig, ax = plt.subplots(dpi=150)
+
+if mode=='file':
+    mpiio_verts = get_verts_file(logdata, 'MPIIO', fname, action)
+    mpiio_collec = matplotlib.collections.PolyCollection(mpiio_verts, facecolor='blue', edgecolor='blue')
+    posix_verts = get_verts_file(logdata, 'POSIX', fname, action)
+    posix_collec = matplotlib.collections.PolyCollection(posix_verts, facecolor='red', edgecolor='red')
+    title = str(jobid)+'_'+fname.split('/').pop()+'_'+action+'_activity'
+else :  # mode=='all'
+    mpiio_verts = get_verts_all(logdata, 'MPIIO', action)
+    mpiio_collec = matplotlib.collections.PolyCollection(mpiio_verts, facecolor='blue', edgecolor='blue')
+    posix_verts = get_verts_all(logdata, 'POSIX', action)
+    posix_collec = matplotlib.collections.PolyCollection(posix_verts, facecolor='red', edgecolor='red')
+    title = str(jobid)+'_'+action+'_activity'
+
+
+ax.add_collection(mpiio_collec)
+ax.add_collection(posix_collec)
+ax.autoscale()
+plt.ylabel("MPI rank")
+plt.xlabel("Time (s)")
+plt.title(title)
+if (showflag):
+    plt.show()
+else:
+    plt.savefig(savefig, format='pdf')
+



View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/compare/2d14002d939f478b24b9556946c3b5c78fd2fc6f...09531f17e874a362950a9c43dde69a2401abcffd

---
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/compare/2d14002d939f478b24b9556946c3b5c78fd2fc6f...09531f17e874a362950a9c43dde69a2401abcffd
You're receiving this email because of your account on xgitlab.cels.anl.gov.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20170922/61a36942/attachment-0001.html>