[Darshan-commits] [Git][darshan/darshan][python-job-summary] Initial WIP progress on python based darshan-job-summary. Uses an HTML...

Kevin Harms xgitlab at cels.anl.gov
Mon Apr 5 16:50:22 CDT 2021



Kevin Harms pushed to branch python-job-summary at darshan / darshan


Commits:
01e7d4ed by Kevin Harms at 2021-04-05T21:50:09+00:00
Initial WIP progress on python based darshan-job-summary. Uses an HTML tempalte and Genshi to generate an HTML report with embedded graphs.

- - - - -


3 changed files:

- + darshan-util/pydarshan/darshan/cli/summary.py
- + darshan-util/pydarshan/darshan/templates/__init__.py
- + darshan-util/pydarshan/darshan/templates/summary.html


Changes:

=====================================
darshan-util/pydarshan/darshan/cli/summary.py
=====================================
@@ -0,0 +1,221 @@
+import argparse
+import base64
+import datetime
+import genshi.template
+import importlib.resources as pkg_resources
+import io
+import matplotlib
+import matplotlib.pyplot as pyplot
+import numpy
+import pandas
+import pytz
+import sys
+
+import darshan
+import darshan.templates
+
+def setup_parser(parser):
+    # setup arguments
+    parser.add_argument('input', help='darshan log file', nargs='?')
+    parser.add_argument('--output', action='store', help='output file name')
+    parser.add_argument('--verbose', help='', action='store_true')
+    parser.add_argument('--debug', help='', action='store_true')
+
+def plot_io_cost(posix_df, mpiio_df, stdio_df, runtime, nprocs):
+
+    buf = io.BytesIO()
+    fig, ax = pyplot.subplots()
+
+    labels = []
+    r_time = []
+    w_time = []
+    m_time = []
+    o_time = []
+
+    if posix_df:
+        s = posix_df['fcounters'].sum(axis=0)
+        
+        labels.append('POSIX')
+        r_time.append( (float(s['POSIX_F_READ_TIME']) / float(runtime * nprocs)) * 100.0 )
+        w_time.append( (float(s['POSIX_F_WRITE_TIME']) / float(runtime * nprocs)) * 100.0 )
+        m_time.append( (float(s['POSIX_F_META_TIME']) / float(runtime * nprocs)) * 100.0 )
+        o_time.append( (float(runtime * nprocs - s['POSIX_F_READ_TIME'] - s['POSIX_F_WRITE_TIME'] - s['POSIX_F_META_TIME']) / float(runtime * nprocs)) * 100.0 )
+
+    if mpiio_df:
+        s = mpiio_df['fcounters'].sum(axis=0)
+        
+        labels.append('MPI-IO')
+        r_time.append( (float(s['MPIIO_F_READ_TIME']) / float(runtime * nprocs)) * 100.0 )
+        w_time.append( (float(s['MPIIO_F_WRITE_TIME']) / float(runtime * nprocs)) * 100.0 )
+        m_time.append( (float(s['MPIIO_F_META_TIME']) / float(runtime * nprocs)) * 100.0 )
+        o_time.append( (float(runtime * nprocs - s['MPIIO_F_READ_TIME'] - s['MPIIO_F_WRITE_TIME'] - s['MPIIO_F_META_TIME']) / float(runtime * nprocs)) * 100.0 )
+
+    if stdio_df:
+        s = stdio_df['fcounters'].sum(axis=0)
+        
+        labels.append('STDIO')
+        r_time.append( (float(s['STDIO_F_READ_TIME']) / float(runtime * nprocs)) * 100.0 )
+        w_time.append( (float(s['STDIO_F_WRITE_TIME']) / float(runtime * nprocs)) * 100.0 )
+        m_time.append( (float(s['STDIO_F_META_TIME']) / float(runtime * nprocs)) * 100.0 )
+        o_time.append( (float(runtime * nprocs - s['STDIO_F_READ_TIME'] - s['STDIO_F_WRITE_TIME'] - s['STDIO_F_META_TIME']) / float(runtime * nprocs)) * 100.0 )
+
+
+    ax.bar(labels, r_time, label='Read', color='purple')
+    ax.bar(labels, w_time, label='Write', color='green', bottom=r_time)
+    ax.bar(labels, m_time, label='Metadata', color='blue', bottom=[a+b for a,b in zip(r_time,w_time)])
+    ax.bar(labels, o_time, label='Other (incluing application compute)', color='orange', bottom=[a+b+c for a,b,c in zip (r_time,w_time,m_time)])
+
+
+    ax.set_ylabel("Percentage of runtime")
+    ax.set_title("Average I/O cost per process")
+    ax.legend(loc="upper right")
+    pyplot.savefig(buf, format='png')
+    buf.seek(0)
+    encoded = base64.b64encode(buf.read())
+    return encoded
+
+def plot_op_count(posix_df, mpiio_df, stdio_df):
+
+    buf = io.BytesIO()
+    fig, ax = pyplot.subplots()
+
+    labels = ['Read', 'Write', 'Open', 'Stat', 'Seek', 'Mmap', 'Fsync']
+    x = numpy.arange(len(labels))
+    bwidth = 0.25
+
+    if posix_df:
+        s = posix_df['counters'].sum(axis=0)
+        vals = [s['POSIX_READS'], s['POSIX_WRITES'], s['POSIX_OPENS'], s['POSIX_STATS'], s['POSIX_SEEKS'], s['POSIX_MMAPS'], s['POSIX_FSYNCS']+s['POSIX_FDSYNCS']]
+        ax.bar(x - 3*bwidth/2, vals, bwidth, label='POSIX')
+
+    if mpiio_df:
+        s = mpiio_df['counters'].sum(axis=0)
+        vals = [s['MPIIO_INDEP_READS'], s['MPIIO_INDEP_WRITES'], s['MPIIO_INDEP_OPENS'], 0, 0, 0, s['MPIIO_SYNCS']]
+        ax.bar(x - bwidth/2, vals, bwidth, label='MPI-IO Indep')
+        vals = [s['MPIIO_COLL_READS'], s['MPIIO_COLL_WRITES'], s['MPIIO_COLL_OPENS'], 0, 0, 0, s['MPIIO_SYNCS']]
+        ax.bar(x + bwidth/2, vals, bwidth, label='MPI-IO Coll')
+
+    if stdio_df:
+        s = stdio_df['counters'].sum(axis=0)
+        vals = [s['STDIO_READS'], s['STDIO_WRITES'], s['STDIO_OPENS'], 0, s['STDIO_SEEKS'], 0, s['STDIO_FLUSHES']]
+        ax.bar(x + 3*bwidth/2, vals, bwidth, label='STDIO')
+
+    ax.set_ylabel("Ops (Total, All Processes)")
+    ax.set_title("I/O Operations Counts")
+    ax.set_xticks(x)
+    ax.set_xticklabels(labels)
+    ax.legend(loc="upper right")
+    pyplot.savefig(buf, format='png')
+    buf.seek(0)
+    encoded = base64.b64encode(buf.read())
+    return encoded
+
+def data_transfer_filesystem(report, posix_df, stdio_df):
+
+    import collections
+    fs_data = collections.defaultdict(lambda: {'read':0,'write':0,'read_rt':0.,'write_rt':0.})
+    total_rd_bytes = 0
+    total_wr_bytes = 0
+
+    if posix_df:
+        posix_df['counters'].loc[:,'mount'] = 'Unknown'
+        posix_df['counters'].loc[:,'mtype'] = 'Unknown'
+        for index, row in posix_df['counters'].iterrows():
+            total_rd_bytes += row['POSIX_BYTES_READ']
+            total_wr_bytes += row['POSIX_BYTES_WRITTEN']
+            for m in report.mounts:
+                if report.name_records[row['id']].startswith(m[0]):
+                    posix_df['counters'].at[index, 'mount'] = m[0]
+                    posix_df['counters'].at[index, 'mtype'] = m[1]
+                    fs_data[m[0]]['read'] += row['POSIX_BYTES_READ']
+                    fs_data[m[0]]['write'] += row['POSIX_BYTES_WRITTEN']
+                    break
+    if stdio_df:
+        stdio_df['counters'].loc[:,'mount'] = 'Unknown'
+        stdio_df['counters'].loc[:,'mtype'] = 'Unknown'
+        for index, row in stdio_df['counters'].iterrows():
+            total_rd_bytes += row['STDIO_BYTES_READ']
+            total_wr_bytes += row['STDIO_BYTES_WRITTEN']
+            for m in report.mounts:
+                if report.name_records[row['id']].startswith(m[0]):
+                    stdio_df['counters'].at[index, 'mount'] = m[0]
+                    stdio_df['counters'].at[index, 'mtype'] = m[1]
+                    fs_data[m[0]]['read'] += row['STDIO_BYTES_READ']
+                    fs_data[m[0]]['write'] += row['STDIO_BYTES_WRITTEN']
+                    break
+
+    for fs in fs_data:
+        fs_data[fs]['read_rt'] = fs_data[fs]['read'] / total_rd_bytes
+        fs_data[fs]['write_rt'] = fs_data[fs]['write'] / total_wr_bytes
+
+    return fs_data
+    
+def main(args=None):
+
+    if args is None:
+        parser = argparse.ArgumentParser(description='')
+        setup_parser(parser)
+        args = parser.parse_args()
+
+    if args.debug:
+        print(args)
+
+    variables = {}
+    report = darshan.DarshanReport(args.input, read_all=True)
+
+    #
+    # Setup template header variabels
+    #
+    variables['exe'] = report.metadata['exe']
+    variables['date'] = datetime.datetime.fromtimestamp(report.metadata['job']['start_time'], pytz.utc)
+    variables['jid'] = report.metadata['job']['jobid']
+    variables['uid'] = report.metadata['job']['uid']
+    variables['nprocs'] = report.metadata['job']['nprocs']
+    etime = int(report.metadata['job']['end_time'])
+    stime = int(report.metadata['job']['start_time'])
+    if etime > stime:
+        variables['runtime'] = etime - stime + 1
+    else:
+        variables['runtime'] = 0
+
+    if 'POSIX' in report.modules:
+        posix_df = report.records['POSIX'].to_df()
+    else:
+        posix_df = None
+
+    if 'MPI-IO' in report.modules:
+        mpiio_df = report.records['MPI-IO'].to_df()
+    else:
+        mpiio_df = None
+    
+    if 'STDIO' in report.modules:
+        stdio_df = report.records['STDIO'].to_df()
+    else:
+        stdio_df = None
+
+    #
+    # Plot I/O cost
+    #
+    variables['plot_io_cost'] = plot_io_cost(posix_df, mpiio_df, stdio_df, int(variables['runtime']), int(variables['nprocs'])).decode('utf-8')
+
+    #
+    # Plot I/O counts
+    #
+    variables['plot_op_count'] = plot_op_count(posix_df, mpiio_df, stdio_df).decode('utf-8')
+
+    variables['fs_data'] = data_transfer_filesystem(report, posix_df, stdio_df)
+
+    template_path = pkg_resources.path(darshan.templates, '')
+    with template_path as path:
+       loader = genshi.template.TemplateLoader(str(path))
+       template = loader.load('summary.html')
+
+       stream = template.generate(title='Darshan Job Summary', var=variables)
+       with open(args.output, 'w') as f:
+           f.write(stream.render('html'))
+           f.close()
+
+    return
+
+if __name__ == "__main__":
+    main()


=====================================
darshan-util/pydarshan/darshan/templates/__init__.py
=====================================


=====================================
darshan-util/pydarshan/darshan/templates/summary.html
=====================================
@@ -0,0 +1,361 @@
+<!DOCTYPE html>
+<html lang="en"
+      xmlns="http://www.w3.org/1999/xhtml"
+      xmlns:py="http://genshi.edgewall.org/">
+<head>
+  <title>Darshan Report</title>
+  <meta charset="utf-8"/>
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css"/>
+  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
+  <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/js/bootstrap.min.js"></script>
+</head>
+<body>
+
+<div class="panel panel-primary">
+	<div class="panel-heading">Darshan Summary Report for <b>${var.exe.split()[0].split('/')[-1]}</b> on ${var.date.date()}</div>
+    <div class="panel-body">${var.exe}</div>
+</div>
+
+<div class="container">
+    <table class="table">
+        <thead>
+            <tr>
+                <th>Jobid</th>
+                <th>uid</th>
+                <th>processes</th>
+                <th>runtime (seconds)</th>
+            </tr>
+        </thead>
+        <tbody>
+            <tr>
+		<td>${var.jid}</td>
+		<td>${var.uid}</td>
+		<td>${var.nprocs}</td>
+		<td>${var.runtime}</td>
+            </tr>
+        </tbody>
+    </table>
+</div>
+
+<div class="panel panel-default">
+    <div class="panel-heading">Performance Estimate</div>
+    <div class="panel-body">I/O performance estimate (at the POSIX layer): transferred 107835432.9 MiB at 44357.76 MiB/s</div>
+</div>
+
+<div class="panel-group">
+    <div class="panel panel-primary">
+        <div class="panel-heading">
+            <h4 class="panel-title">
+                <a data-toggle="collapse" href="#collapse0">Documentation</a>
+            </h4>
+        </div>
+        <div id="collapse0" class="panel-collapse collapse">
+            <div class="panel-body">
+                <div class="container">
+                    <div class="row">
+                        <div class="col-lg-6">
+                            <div class="list-group">
+                                <a class="list-group-item" href="https://www.mcs.anl.gov/research/projects/darshan/">Darshan Home</a>
+                                <a class="list-group-item" href="https://www.mcs.anl.gov/research/projects/darshan/documentation/">Darshan Documentation</a>
+                                <a class="list-group-item" href="https://www.mcs.anl.gov/research/projects/darshan/docs/darshan-util.html#_guide_to_darshan_parser_output">Counter Documentation</a>
+                            </div>
+                        </div>
+                        <div class="col-lg-6">
+                            <img src="https://www.mcs.anl.gov/research/projects/darshan/wp-content/uploads/sites/54/2018/05/darshan-web-logo.png" class="img-rounded" alt="darshan logo"/>
+                        </div>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </div>
+<div class="panel panel-primary">
+    <div class="panel-heading">
+        <h4 class="panel-title">
+            <a data-toggle="collapse" href="#collapse1">Overview</a>
+        </h4>
+    </div>
+    <div id="collapse1" class="panel-collapse collapse in">
+    <div class="panel-body">
+        <div class="container">
+        <div class="row">
+            <div class="col-lg-6">
+		    <img class="img-rounded" alt="avg-io-per-proc" src="data:image/png;base64,${var.plot_io_cost}"/>
+            </div>
+            <div class="col-lg-6">
+		    <img class="img-rounded" alt="io-counts" src="data:image/png;base64,${var.plot_op_count}"/>
+            </div>
+        </div>
+        <div class="row">
+            <div class="col-lg-6">
+                <h3>Data Transer Per Filesystem (POSIX and STDIO)</h3>
+                <table class="table">
+                    <thead>
+                        <tr>
+                            <th rowspan="2">File System</th>
+                            <th colspan="2">Write</th>
+                            <th colspan="2">Read</th>
+                        </tr>
+                        <tr>
+                            <th>MiB</th>
+                            <th>Ratio</th>
+                            <th>MiB</th>
+                            <th>Ratio</th>
+                        </tr>
+                    </thead>
+                    <tbody>
+			<py:for each="key in var.fs_data">
+                            <tr>
+				<td>${key}</td>
+				<td>${var.fs_data[key]['write'] / (1024*1024)}</td>
+				<td>${var.fs_data[key]['write_rt']}</td>
+				<td>${var.fs_data[key]['read'] / (1024*1024)}</td>
+				<td>${var.fs_data[key]['read_rt']}</td>
+                            </tr>
+			</py:for>
+                    </tbody>
+                </table>
+            </div>
+            <div class="col-lg-6">
+                <h3>File Count Summary<br/>(estimated by POSIX I/O access offsets)</h3>
+                <table class="table">
+                <thead>
+                <tr>
+                    <th>Type</th>
+                    <th>Number of Files</th>
+                    <th>Avg. Size</th>
+                    <th>Max Size</th>
+                </tr>
+                </thead>
+                <tbody>
+                <tr>
+                    <td>Total Opened</td>
+                    <td>2838</td>
+                    <td>38G</td>
+                    <td>113G</td>
+                </tr>
+                <tr>
+                    <td>read-only files</td>
+                    <td>129</td>
+                    <td>66G</td>
+                    <td>70G</td>
+                </tr>
+                <tr>
+                    <td>write-only files</td>
+                    <td>2451</td>
+                    <td>34G</td>
+                    <td>113G</td>
+                </tr>
+                <tr>
+                    <td>read/write files</td>
+                    <td>258</td>
+                    <td>67G</td>
+                    <td>72G</td>
+                </tr>
+                <tr>
+                    <td>created files</td>
+                    <td>2709</td>
+                    <td>37G</td>
+                    <td>113G</td>
+                </tr>
+                </tbody>
+                </table>
+            </div>
+	    <!-- </div> -->
+	    <!-- </div> -->
+    </div>
+    </div>
+    </div>
+<div class="panel panel-primary">
+    <div class="panel-heading">
+        <h4 class="panel-title">
+            <a data-toggle="collapse" href="#collapse2">I/O Access</a>
+        </h4>
+    </div>
+    <div id="collapse2" class="panel-collapse collapse in">
+        <div class="panel-body">
+            <div class="container">
+                <div class="row">
+                    <div class="col-lg-6">
+                        <img src="plot2.png" class="img-rounded" alt="io-access"/>
+                    </div>
+                    <div class="col-lg-6">
+                        <figure>
+                            <img src="plot4.png" class="img-rounded" alt="io-count"/>
+                            <figcaption>sequential: An I/O op issued at an offset greter than wehre th previous I/O op ended.<br/>
+                                consecutive: An I/O op issued at the offset immediately following the end of the previous I/O op.
+                            </figcaption>
+                        </figure>
+                    </div>
+                </div>
+                <div class="row">
+                    <div class="col-lg-4">
+                        <h3>Most Common Access Size<br/>(POSIX or MPI-IO)</h3>
+                        <table class="table">
+                            <thead>
+                                <tr>
+                                    <th></th>
+                                    <th>Access Size</th>
+                                    <th>Count</th>
+                                </tr>
+                            </thead>
+                            <tbody>
+                                <tr>
+                                    <td>POSIX</td>
+                                    <td>11632</td>
+                                    <td>1536</td>
+                                </tr>
+                                <tr>
+                                    <td></td>
+                                    <td>153548</td>
+                                    <td>912</td>
+                                </tr>
+                                <tr>
+                                    <td></td>
+                                    <td>135976</td>
+                                    <td>912</td>
+                                </tr>
+                                <tr>
+                                    <td></td>
+                                    <td>171392</td>
+                                    <td>912</td>
+                                </tr>
+                            </tbody>
+                        </table>
+                    </div>
+                    <div class="col-lg-8">
+                        <h3>Average I/O per process (POSIX or MPI-IO)</h3>
+                        <table class="table">
+                            <thead>
+                                <tr>
+                                    <th></th>
+                                    <th>Cumulative time spent in I/O functions (seconds)</th>
+                                    <th>Amount of I/O (MB)</th>
+                                </tr>
+                            </thead>
+                            <tbody>
+                                <tr>
+                                    <td>Independent reads</td>
+                                    <td>1.8266612705078</td>
+                                    <td>266.787112236954</td>
+                                </tr>
+                                <tr>
+                                    <td>Independent writes</td>
+                                    <td>9.22240010139975</td>
+                                    <td>4121.04788513078</td>
+                                </tr>
+                                <tr>
+                                    <td>Indepednent metadata</td>
+                                    <td>1686.44261747184</td>
+                                    <td>N/A</td>
+                                </tr>
+                                <tr>
+                                    <td>Shared reads</td>
+                                    <td>0</td>
+                                    <td>0</td>
+                                </tr>
+                                <tr>
+                                    <td>Shared writes</td>
+                                    <td>0</td>
+                                    <td>0</td>
+                                </tr>
+                                <tr>
+                                    <td>Shared metadata</td>
+                                    <td>0</td>
+                                    <td>N/A</td>
+                                </tr>
+                            </tbody>
+                        </table>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </div>
+</div>
+<div class="panel panel-primary">
+    <div class="panel-heading">
+        <h4 class="panel-title">
+            <a data-toggle="collapse" href="#collapse3">I/O Timeline</a>
+        </h4>
+    </div>
+    <div id="collapse3" class="panel-collapse collapse in">
+        <div class="panel-body">
+            <div class="container">
+                <div class="row">
+                    <div class="col-lg-6">
+                        <img src="plot5.png" class="img-rounded" alt="read-timeline"/>
+                    </div>
+                </div>
+                <div class="row">
+                    <div class="col-lg-6">
+                        <img src="plot6.png" class="img-rounded" alt="write-timeline"/>
+                    </div>
+                </div>
+                <div class="row">
+                    <div class="col-lg-6">
+                        <img src="plot7.png" class="img-rounded" alt="shared-timeline"/>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </div>
+</div>
+<div class="panel panel-primary">
+    <div class="panel-heading">
+        <h4 class="panel-title">
+            <a data-toggle="collapse" href="#collapse4">I/O Variance</a>
+        </h4>
+    </div>
+    <div id="collapse4" class="panel-collapse collapse in">
+        <div class="panel-body">
+            <div class="container">
+                <div class="row">
+                    <div class="col-lg-12">
+                        <h3>Variance in Shared File (POSIX and STDIO)</h3>
+                        <table class="table">
+                            <thead>
+                                <tr>
+                                    <th rowspan="2">File Suffix</th>
+                                    <th rowspan="2">Processes</th>
+                                    <th colspan="3">Fastest</th>
+                                    <th colspan="3">Slowest</th>
+                                    <th colspan="2">σ</th>
+                                </tr>
+                                <tr>
+                                    <th>Rank</th>
+                                    <th>Time</th>
+                                    <th>Bytes</th>
+                                    <th>Rank</th>
+                                    <th>Time</th>
+                                    <th>Bytes</th>
+                                    <th>Time</th>
+                                    <th>Bytes</th>
+                                </tr>
+                            </thead>
+                            <tbody>
+                                <tr>
+                                    <td>blah.txt</td>
+                                    <td>100</td>
+                                    <td>1</td>
+                                    <td>2</td>
+                                    <td>3</td>
+                                    <td>0</td>
+                                    <td>4</td>
+                                    <td>5</td>
+                                    <td>6</td>
+                                    <td>7</td>
+                                </tr>
+                            </tbody>
+                        </table>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </div>
+</div>
+</div>
+</div>
+</div>
+
+</body>
+</html>



View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/-/commit/01e7d4ed21dda9d57ce70a2e1091cdfc120f4e57

-- 
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/-/commit/01e7d4ed21dda9d57ce70a2e1091cdfc120f4e57
You're receiving this email because of your account on xgitlab.cels.anl.gov.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20210405/152aabd8/attachment-0001.html>


More information about the Darshan-commits mailing list