[Darshan-commits] [Git][darshan/darshan][python-job-summary] Initial WIP progress on python based darshan-job-summary. Uses an HTML...
Kevin Harms
xgitlab at cels.anl.gov
Mon Apr 5 16:50:22 CDT 2021
Kevin Harms pushed to branch python-job-summary at darshan / darshan
Commits:
01e7d4ed by Kevin Harms at 2021-04-05T21:50:09+00:00
Initial WIP progress on python based darshan-job-summary. Uses an HTML tempalte and Genshi to generate an HTML report with embedded graphs.
- - - - -
3 changed files:
- + darshan-util/pydarshan/darshan/cli/summary.py
- + darshan-util/pydarshan/darshan/templates/__init__.py
- + darshan-util/pydarshan/darshan/templates/summary.html
Changes:
=====================================
darshan-util/pydarshan/darshan/cli/summary.py
=====================================
@@ -0,0 +1,221 @@
+import argparse
+import base64
+import datetime
+import genshi.template
+import importlib.resources as pkg_resources
+import io
+import matplotlib
+import matplotlib.pyplot as pyplot
+import numpy
+import pandas
+import pytz
+import sys
+
+import darshan
+import darshan.templates
+
+def setup_parser(parser):
+ # setup arguments
+ parser.add_argument('input', help='darshan log file', nargs='?')
+ parser.add_argument('--output', action='store', help='output file name')
+ parser.add_argument('--verbose', help='', action='store_true')
+ parser.add_argument('--debug', help='', action='store_true')
+
+def plot_io_cost(posix_df, mpiio_df, stdio_df, runtime, nprocs):
+
+ buf = io.BytesIO()
+ fig, ax = pyplot.subplots()
+
+ labels = []
+ r_time = []
+ w_time = []
+ m_time = []
+ o_time = []
+
+ if posix_df:
+ s = posix_df['fcounters'].sum(axis=0)
+
+ labels.append('POSIX')
+ r_time.append( (float(s['POSIX_F_READ_TIME']) / float(runtime * nprocs)) * 100.0 )
+ w_time.append( (float(s['POSIX_F_WRITE_TIME']) / float(runtime * nprocs)) * 100.0 )
+ m_time.append( (float(s['POSIX_F_META_TIME']) / float(runtime * nprocs)) * 100.0 )
+ o_time.append( (float(runtime * nprocs - s['POSIX_F_READ_TIME'] - s['POSIX_F_WRITE_TIME'] - s['POSIX_F_META_TIME']) / float(runtime * nprocs)) * 100.0 )
+
+ if mpiio_df:
+ s = mpiio_df['fcounters'].sum(axis=0)
+
+ labels.append('MPI-IO')
+ r_time.append( (float(s['MPIIO_F_READ_TIME']) / float(runtime * nprocs)) * 100.0 )
+ w_time.append( (float(s['MPIIO_F_WRITE_TIME']) / float(runtime * nprocs)) * 100.0 )
+ m_time.append( (float(s['MPIIO_F_META_TIME']) / float(runtime * nprocs)) * 100.0 )
+ o_time.append( (float(runtime * nprocs - s['MPIIO_F_READ_TIME'] - s['MPIIO_F_WRITE_TIME'] - s['MPIIO_F_META_TIME']) / float(runtime * nprocs)) * 100.0 )
+
+ if stdio_df:
+ s = stdio_df['fcounters'].sum(axis=0)
+
+ labels.append('STDIO')
+ r_time.append( (float(s['STDIO_F_READ_TIME']) / float(runtime * nprocs)) * 100.0 )
+ w_time.append( (float(s['STDIO_F_WRITE_TIME']) / float(runtime * nprocs)) * 100.0 )
+ m_time.append( (float(s['STDIO_F_META_TIME']) / float(runtime * nprocs)) * 100.0 )
+ o_time.append( (float(runtime * nprocs - s['STDIO_F_READ_TIME'] - s['STDIO_F_WRITE_TIME'] - s['STDIO_F_META_TIME']) / float(runtime * nprocs)) * 100.0 )
+
+
+ ax.bar(labels, r_time, label='Read', color='purple')
+ ax.bar(labels, w_time, label='Write', color='green', bottom=r_time)
+ ax.bar(labels, m_time, label='Metadata', color='blue', bottom=[a+b for a,b in zip(r_time,w_time)])
+ ax.bar(labels, o_time, label='Other (incluing application compute)', color='orange', bottom=[a+b+c for a,b,c in zip (r_time,w_time,m_time)])
+
+
+ ax.set_ylabel("Percentage of runtime")
+ ax.set_title("Average I/O cost per process")
+ ax.legend(loc="upper right")
+ pyplot.savefig(buf, format='png')
+ buf.seek(0)
+ encoded = base64.b64encode(buf.read())
+ return encoded
+
+def plot_op_count(posix_df, mpiio_df, stdio_df):
+
+ buf = io.BytesIO()
+ fig, ax = pyplot.subplots()
+
+ labels = ['Read', 'Write', 'Open', 'Stat', 'Seek', 'Mmap', 'Fsync']
+ x = numpy.arange(len(labels))
+ bwidth = 0.25
+
+ if posix_df:
+ s = posix_df['counters'].sum(axis=0)
+ vals = [s['POSIX_READS'], s['POSIX_WRITES'], s['POSIX_OPENS'], s['POSIX_STATS'], s['POSIX_SEEKS'], s['POSIX_MMAPS'], s['POSIX_FSYNCS']+s['POSIX_FDSYNCS']]
+ ax.bar(x - 3*bwidth/2, vals, bwidth, label='POSIX')
+
+ if mpiio_df:
+ s = mpiio_df['counters'].sum(axis=0)
+ vals = [s['MPIIO_INDEP_READS'], s['MPIIO_INDEP_WRITES'], s['MPIIO_INDEP_OPENS'], 0, 0, 0, s['MPIIO_SYNCS']]
+ ax.bar(x - bwidth/2, vals, bwidth, label='MPI-IO Indep')
+ vals = [s['MPIIO_COLL_READS'], s['MPIIO_COLL_WRITES'], s['MPIIO_COLL_OPENS'], 0, 0, 0, s['MPIIO_SYNCS']]
+ ax.bar(x + bwidth/2, vals, bwidth, label='MPI-IO Coll')
+
+ if stdio_df:
+ s = stdio_df['counters'].sum(axis=0)
+ vals = [s['STDIO_READS'], s['STDIO_WRITES'], s['STDIO_OPENS'], 0, s['STDIO_SEEKS'], 0, s['STDIO_FLUSHES']]
+ ax.bar(x + 3*bwidth/2, vals, bwidth, label='STDIO')
+
+ ax.set_ylabel("Ops (Total, All Processes)")
+ ax.set_title("I/O Operations Counts")
+ ax.set_xticks(x)
+ ax.set_xticklabels(labels)
+ ax.legend(loc="upper right")
+ pyplot.savefig(buf, format='png')
+ buf.seek(0)
+ encoded = base64.b64encode(buf.read())
+ return encoded
+
+def data_transfer_filesystem(report, posix_df, stdio_df):
+
+ import collections
+ fs_data = collections.defaultdict(lambda: {'read':0,'write':0,'read_rt':0.,'write_rt':0.})
+ total_rd_bytes = 0
+ total_wr_bytes = 0
+
+ if posix_df:
+ posix_df['counters'].loc[:,'mount'] = 'Unknown'
+ posix_df['counters'].loc[:,'mtype'] = 'Unknown'
+ for index, row in posix_df['counters'].iterrows():
+ total_rd_bytes += row['POSIX_BYTES_READ']
+ total_wr_bytes += row['POSIX_BYTES_WRITTEN']
+ for m in report.mounts:
+ if report.name_records[row['id']].startswith(m[0]):
+ posix_df['counters'].at[index, 'mount'] = m[0]
+ posix_df['counters'].at[index, 'mtype'] = m[1]
+ fs_data[m[0]]['read'] += row['POSIX_BYTES_READ']
+ fs_data[m[0]]['write'] += row['POSIX_BYTES_WRITTEN']
+ break
+ if stdio_df:
+ stdio_df['counters'].loc[:,'mount'] = 'Unknown'
+ stdio_df['counters'].loc[:,'mtype'] = 'Unknown'
+ for index, row in stdio_df['counters'].iterrows():
+ total_rd_bytes += row['STDIO_BYTES_READ']
+ total_wr_bytes += row['STDIO_BYTES_WRITTEN']
+ for m in report.mounts:
+ if report.name_records[row['id']].startswith(m[0]):
+ stdio_df['counters'].at[index, 'mount'] = m[0]
+ stdio_df['counters'].at[index, 'mtype'] = m[1]
+ fs_data[m[0]]['read'] += row['STDIO_BYTES_READ']
+ fs_data[m[0]]['write'] += row['STDIO_BYTES_WRITTEN']
+ break
+
+ for fs in fs_data:
+ fs_data[fs]['read_rt'] = fs_data[fs]['read'] / total_rd_bytes
+ fs_data[fs]['write_rt'] = fs_data[fs]['write'] / total_wr_bytes
+
+ return fs_data
+
+def main(args=None):
+
+ if args is None:
+ parser = argparse.ArgumentParser(description='')
+ setup_parser(parser)
+ args = parser.parse_args()
+
+ if args.debug:
+ print(args)
+
+ variables = {}
+ report = darshan.DarshanReport(args.input, read_all=True)
+
+ #
+ # Setup template header variabels
+ #
+ variables['exe'] = report.metadata['exe']
+ variables['date'] = datetime.datetime.fromtimestamp(report.metadata['job']['start_time'], pytz.utc)
+ variables['jid'] = report.metadata['job']['jobid']
+ variables['uid'] = report.metadata['job']['uid']
+ variables['nprocs'] = report.metadata['job']['nprocs']
+ etime = int(report.metadata['job']['end_time'])
+ stime = int(report.metadata['job']['start_time'])
+ if etime > stime:
+ variables['runtime'] = etime - stime + 1
+ else:
+ variables['runtime'] = 0
+
+ if 'POSIX' in report.modules:
+ posix_df = report.records['POSIX'].to_df()
+ else:
+ posix_df = None
+
+ if 'MPI-IO' in report.modules:
+ mpiio_df = report.records['MPI-IO'].to_df()
+ else:
+ mpiio_df = None
+
+ if 'STDIO' in report.modules:
+ stdio_df = report.records['STDIO'].to_df()
+ else:
+ stdio_df = None
+
+ #
+ # Plot I/O cost
+ #
+ variables['plot_io_cost'] = plot_io_cost(posix_df, mpiio_df, stdio_df, int(variables['runtime']), int(variables['nprocs'])).decode('utf-8')
+
+ #
+ # Plot I/O counts
+ #
+ variables['plot_op_count'] = plot_op_count(posix_df, mpiio_df, stdio_df).decode('utf-8')
+
+ variables['fs_data'] = data_transfer_filesystem(report, posix_df, stdio_df)
+
+ template_path = pkg_resources.path(darshan.templates, '')
+ with template_path as path:
+ loader = genshi.template.TemplateLoader(str(path))
+ template = loader.load('summary.html')
+
+ stream = template.generate(title='Darshan Job Summary', var=variables)
+ with open(args.output, 'w') as f:
+ f.write(stream.render('html'))
+ f.close()
+
+ return
+
+if __name__ == "__main__":
+ main()
=====================================
darshan-util/pydarshan/darshan/templates/__init__.py
=====================================
=====================================
darshan-util/pydarshan/darshan/templates/summary.html
=====================================
@@ -0,0 +1,361 @@
+<!DOCTYPE html>
+<html lang="en"
+ xmlns="http://www.w3.org/1999/xhtml"
+ xmlns:py="http://genshi.edgewall.org/">
+<head>
+ <title>Darshan Report</title>
+ <meta charset="utf-8"/>
+ <meta name="viewport" content="width=device-width, initial-scale=1"/>
+ <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css"/>
+ <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
+ <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/js/bootstrap.min.js"></script>
+</head>
+<body>
+
+<div class="panel panel-primary">
+ <div class="panel-heading">Darshan Summary Report for <b>${var.exe.split()[0].split('/')[-1]}</b> on ${var.date.date()}</div>
+ <div class="panel-body">${var.exe}</div>
+</div>
+
+<div class="container">
+ <table class="table">
+ <thead>
+ <tr>
+ <th>Jobid</th>
+ <th>uid</th>
+ <th>processes</th>
+ <th>runtime (seconds)</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td>${var.jid}</td>
+ <td>${var.uid}</td>
+ <td>${var.nprocs}</td>
+ <td>${var.runtime}</td>
+ </tr>
+ </tbody>
+ </table>
+</div>
+
+<div class="panel panel-default">
+ <div class="panel-heading">Performance Estimate</div>
+ <div class="panel-body">I/O performance estimate (at the POSIX layer): transferred 107835432.9 MiB at 44357.76 MiB/s</div>
+</div>
+
+<div class="panel-group">
+ <div class="panel panel-primary">
+ <div class="panel-heading">
+ <h4 class="panel-title">
+ <a data-toggle="collapse" href="#collapse0">Documentation</a>
+ </h4>
+ </div>
+ <div id="collapse0" class="panel-collapse collapse">
+ <div class="panel-body">
+ <div class="container">
+ <div class="row">
+ <div class="col-lg-6">
+ <div class="list-group">
+ <a class="list-group-item" href="https://www.mcs.anl.gov/research/projects/darshan/">Darshan Home</a>
+ <a class="list-group-item" href="https://www.mcs.anl.gov/research/projects/darshan/documentation/">Darshan Documentation</a>
+ <a class="list-group-item" href="https://www.mcs.anl.gov/research/projects/darshan/docs/darshan-util.html#_guide_to_darshan_parser_output">Counter Documentation</a>
+ </div>
+ </div>
+ <div class="col-lg-6">
+ <img src="https://www.mcs.anl.gov/research/projects/darshan/wp-content/uploads/sites/54/2018/05/darshan-web-logo.png" class="img-rounded" alt="darshan logo"/>
+ </div>
+ </div>
+ </div>
+ </div>
+ </div>
+ </div>
+<div class="panel panel-primary">
+ <div class="panel-heading">
+ <h4 class="panel-title">
+ <a data-toggle="collapse" href="#collapse1">Overview</a>
+ </h4>
+ </div>
+ <div id="collapse1" class="panel-collapse collapse in">
+ <div class="panel-body">
+ <div class="container">
+ <div class="row">
+ <div class="col-lg-6">
+ <img class="img-rounded" alt="avg-io-per-proc" src="data:image/png;base64,${var.plot_io_cost}"/>
+ </div>
+ <div class="col-lg-6">
+ <img class="img-rounded" alt="io-counts" src="data:image/png;base64,${var.plot_op_count}"/>
+ </div>
+ </div>
+ <div class="row">
+ <div class="col-lg-6">
+ <h3>Data Transer Per Filesystem (POSIX and STDIO)</h3>
+ <table class="table">
+ <thead>
+ <tr>
+ <th rowspan="2">File System</th>
+ <th colspan="2">Write</th>
+ <th colspan="2">Read</th>
+ </tr>
+ <tr>
+ <th>MiB</th>
+ <th>Ratio</th>
+ <th>MiB</th>
+ <th>Ratio</th>
+ </tr>
+ </thead>
+ <tbody>
+ <py:for each="key in var.fs_data">
+ <tr>
+ <td>${key}</td>
+ <td>${var.fs_data[key]['write'] / (1024*1024)}</td>
+ <td>${var.fs_data[key]['write_rt']}</td>
+ <td>${var.fs_data[key]['read'] / (1024*1024)}</td>
+ <td>${var.fs_data[key]['read_rt']}</td>
+ </tr>
+ </py:for>
+ </tbody>
+ </table>
+ </div>
+ <div class="col-lg-6">
+ <h3>File Count Summary<br/>(estimated by POSIX I/O access offsets)</h3>
+ <table class="table">
+ <thead>
+ <tr>
+ <th>Type</th>
+ <th>Number of Files</th>
+ <th>Avg. Size</th>
+ <th>Max Size</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td>Total Opened</td>
+ <td>2838</td>
+ <td>38G</td>
+ <td>113G</td>
+ </tr>
+ <tr>
+ <td>read-only files</td>
+ <td>129</td>
+ <td>66G</td>
+ <td>70G</td>
+ </tr>
+ <tr>
+ <td>write-only files</td>
+ <td>2451</td>
+ <td>34G</td>
+ <td>113G</td>
+ </tr>
+ <tr>
+ <td>read/write files</td>
+ <td>258</td>
+ <td>67G</td>
+ <td>72G</td>
+ </tr>
+ <tr>
+ <td>created files</td>
+ <td>2709</td>
+ <td>37G</td>
+ <td>113G</td>
+ </tr>
+ </tbody>
+ </table>
+ </div>
+ <!-- </div> -->
+ <!-- </div> -->
+ </div>
+ </div>
+ </div>
+<div class="panel panel-primary">
+ <div class="panel-heading">
+ <h4 class="panel-title">
+ <a data-toggle="collapse" href="#collapse2">I/O Access</a>
+ </h4>
+ </div>
+ <div id="collapse2" class="panel-collapse collapse in">
+ <div class="panel-body">
+ <div class="container">
+ <div class="row">
+ <div class="col-lg-6">
+ <img src="plot2.png" class="img-rounded" alt="io-access"/>
+ </div>
+ <div class="col-lg-6">
+ <figure>
+ <img src="plot4.png" class="img-rounded" alt="io-count"/>
+ <figcaption>sequential: An I/O op issued at an offset greter than wehre th previous I/O op ended.<br/>
+ consecutive: An I/O op issued at the offset immediately following the end of the previous I/O op.
+ </figcaption>
+ </figure>
+ </div>
+ </div>
+ <div class="row">
+ <div class="col-lg-4">
+ <h3>Most Common Access Size<br/>(POSIX or MPI-IO)</h3>
+ <table class="table">
+ <thead>
+ <tr>
+ <th></th>
+ <th>Access Size</th>
+ <th>Count</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td>POSIX</td>
+ <td>11632</td>
+ <td>1536</td>
+ </tr>
+ <tr>
+ <td></td>
+ <td>153548</td>
+ <td>912</td>
+ </tr>
+ <tr>
+ <td></td>
+ <td>135976</td>
+ <td>912</td>
+ </tr>
+ <tr>
+ <td></td>
+ <td>171392</td>
+ <td>912</td>
+ </tr>
+ </tbody>
+ </table>
+ </div>
+ <div class="col-lg-8">
+ <h3>Average I/O per process (POSIX or MPI-IO)</h3>
+ <table class="table">
+ <thead>
+ <tr>
+ <th></th>
+ <th>Cumulative time spent in I/O functions (seconds)</th>
+ <th>Amount of I/O (MB)</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td>Independent reads</td>
+ <td>1.8266612705078</td>
+ <td>266.787112236954</td>
+ </tr>
+ <tr>
+ <td>Independent writes</td>
+ <td>9.22240010139975</td>
+ <td>4121.04788513078</td>
+ </tr>
+ <tr>
+ <td>Indepednent metadata</td>
+ <td>1686.44261747184</td>
+ <td>N/A</td>
+ </tr>
+ <tr>
+ <td>Shared reads</td>
+ <td>0</td>
+ <td>0</td>
+ </tr>
+ <tr>
+ <td>Shared writes</td>
+ <td>0</td>
+ <td>0</td>
+ </tr>
+ <tr>
+ <td>Shared metadata</td>
+ <td>0</td>
+ <td>N/A</td>
+ </tr>
+ </tbody>
+ </table>
+ </div>
+ </div>
+ </div>
+ </div>
+ </div>
+</div>
+<div class="panel panel-primary">
+ <div class="panel-heading">
+ <h4 class="panel-title">
+ <a data-toggle="collapse" href="#collapse3">I/O Timeline</a>
+ </h4>
+ </div>
+ <div id="collapse3" class="panel-collapse collapse in">
+ <div class="panel-body">
+ <div class="container">
+ <div class="row">
+ <div class="col-lg-6">
+ <img src="plot5.png" class="img-rounded" alt="read-timeline"/>
+ </div>
+ </div>
+ <div class="row">
+ <div class="col-lg-6">
+ <img src="plot6.png" class="img-rounded" alt="write-timeline"/>
+ </div>
+ </div>
+ <div class="row">
+ <div class="col-lg-6">
+ <img src="plot7.png" class="img-rounded" alt="shared-timeline"/>
+ </div>
+ </div>
+ </div>
+ </div>
+ </div>
+</div>
+<div class="panel panel-primary">
+ <div class="panel-heading">
+ <h4 class="panel-title">
+ <a data-toggle="collapse" href="#collapse4">I/O Variance</a>
+ </h4>
+ </div>
+ <div id="collapse4" class="panel-collapse collapse in">
+ <div class="panel-body">
+ <div class="container">
+ <div class="row">
+ <div class="col-lg-12">
+ <h3>Variance in Shared File (POSIX and STDIO)</h3>
+ <table class="table">
+ <thead>
+ <tr>
+ <th rowspan="2">File Suffix</th>
+ <th rowspan="2">Processes</th>
+ <th colspan="3">Fastest</th>
+ <th colspan="3">Slowest</th>
+ <th colspan="2">σ</th>
+ </tr>
+ <tr>
+ <th>Rank</th>
+ <th>Time</th>
+ <th>Bytes</th>
+ <th>Rank</th>
+ <th>Time</th>
+ <th>Bytes</th>
+ <th>Time</th>
+ <th>Bytes</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td>blah.txt</td>
+ <td>100</td>
+ <td>1</td>
+ <td>2</td>
+ <td>3</td>
+ <td>0</td>
+ <td>4</td>
+ <td>5</td>
+ <td>6</td>
+ <td>7</td>
+ </tr>
+ </tbody>
+ </table>
+ </div>
+ </div>
+ </div>
+ </div>
+ </div>
+</div>
+</div>
+</div>
+</div>
+
+</body>
+</html>
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/-/commit/01e7d4ed21dda9d57ce70a2e1091cdfc120f4e57
--
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/-/commit/01e7d4ed21dda9d57ce70a2e1091cdfc120f4e57
You're receiving this email because of your account on xgitlab.cels.anl.gov.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20210405/152aabd8/attachment-0001.html>
More information about the Darshan-commits
mailing list