[Darshan-commits] [Git][darshan/darshan][python-job-summary] pydarsha/html: include updates to python script that generates job summary report in html

Sudheer Chunduri xgitlab at cels.anl.gov
Wed Apr 7 17:20:05 CDT 2021



Sudheer Chunduri pushed to branch python-job-summary at darshan / darshan


Commits:
95952927 by Sudheer Chunduri at 2021-04-07T22:19:53+00:00
pydarsha/html: include updates to python script that generates job summary report in html
include processing for APXC and APMPI records

- - - - -


2 changed files:

- darshan-util/pydarshan/darshan/cli/summary.py
- darshan-util/pydarshan/darshan/templates/summary.html


Changes:

=====================================
darshan-util/pydarshan/darshan/cli/summary.py
=====================================
@@ -10,13 +10,16 @@ import numpy
 import pandas
 import pytz
 import sys
+import pprint
+import pandas as pd
+import seaborn as sns
 
 import darshan
 import darshan.templates
 
 def setup_parser(parser):
     # setup arguments
-    parser.add_argument('input', help='darshan log file', nargs='?')
+    parser.add_argument('--input', help='darshan log file', nargs='?')
     parser.add_argument('--output', action='store', help='output file name')
     parser.add_argument('--verbose', help='', action='store_true')
     parser.add_argument('--debug', help='', action='store_true')
@@ -149,7 +152,128 @@ def data_transfer_filesystem(report, posix_df, stdio_df):
         fs_data[fs]['write_rt'] = fs_data[fs]['write'] / total_wr_bytes
 
     return fs_data
+
+def apmpi_process(apmpi_dict):
     
+    header_rec = apmpi_dict[0]
+    sync_flag = header_rec["sync_flag"]
+    print("sync_flag= ", sync_flag)
+    print(
+        "APMPI Variance in total mpi time: ", header_rec["variance_total_mpitime"], "\n"
+    )
+    if sync_flag:
+        print(
+            "APMPI Variance in total mpi sync time: ",
+            header_rec["variance_total_mpisynctime"],
+        )
+
+    df_apmpi = pd.DataFrame()
+    list_mpiop = []
+    list_rank = []
+    for rec in apmpi_dict[1:]:  # skip the first record which is header record
+        mpi_nonzero_callcount = []
+        for k, v in rec["all_counters"].items():
+            if k.endswith("_CALL_COUNT") and v > 0:
+                mpi_nonzero_callcount.append(k[: -(len("CALL_COUNT"))])
+
+        df_rank = pd.DataFrame()
+        for mpiop in mpi_nonzero_callcount:
+            ncall = mpiop
+            ncount = mpiop + "CALL_COUNT"
+            nsize = mpiop + "TOTAL_BYTES"
+            h0 = mpiop + "MSG_SIZE_AGG_0_256"
+            h1 = mpiop + "MSG_SIZE_AGG_256_1K"
+            h2 = mpiop + "MSG_SIZE_AGG_1K_8K"
+            h3 = mpiop + "MSG_SIZE_AGG_8K_256K"
+            h4 = mpiop + "MSG_SIZE_AGG_256K_1M"
+            h5 = mpiop + "MSG_SIZE_AGG_1M_PLUS"
+            ntime = mpiop + "TOTAL_TIME"
+            mintime = mpiop + "MIN_TIME"
+            maxtime = mpiop + "MAX_TIME"
+            if sync_flag:
+                totalsync = mpiop + "TOTAL_SYNC_TIME"
+
+            mpiopstat = {}
+            mpiopstat["Rank"] = rec["rank"]
+            mpiopstat["Node_ID"] = rec["node_name"]
+            mpiopstat["Call"] = ncall[:-1]
+            mpiopstat["Total_Time"] = rec["all_counters"][ntime]
+            mpiopstat["Count"] = rec["all_counters"][ncount]
+            mpiopstat["Total_Bytes"] = rec["all_counters"].get(nsize, None)
+            mpiopstat["[0-256B]"] = rec["all_counters"].get(h0, None)
+            mpiopstat["[1K-8KB]"] = rec["all_counters"].get(h2, None)
+            mpiopstat["[8K-256KB]"] = rec["all_counters"].get(h3, None)
+            mpiopstat["256K-1MB"] = rec["all_counters"].get(h4, None)
+            mpiopstat["[>1MB]"] = rec["all_counters"].get(h5, None)
+            mpiopstat["Min_Time"] = rec["all_counters"][mintime]
+            mpiopstat["Max_Time"] = rec["all_counters"][maxtime]
+            if sync_flag and (totalsync in rec["all_counters"]):
+                mpiopstat["Total_SYNC_Time"] = rec["all_counters"][totalsync]
+
+            list_mpiop.append(mpiopstat)
+
+        rankstat = {}
+        rankstat["Rank"] = rec["rank"]
+        rankstat["Node_ID"] = rec["node_name"]
+        rankstat["Call"] = "Total_MPI_time"
+        rankstat["Total_Time"] = rec["all_counters"]["MPI_TOTAL_COMM_TIME"]
+        list_rank.append(rankstat)
+    df_rank = pd.DataFrame(list_rank)
+    avg_total_time = df_rank["Total_Time"].mean()
+    max_total_time = df_rank["Total_Time"].max()
+    min_total_time = df_rank["Total_Time"].min()
+    max_rank = df_rank.loc[df_rank["Total_Time"].idxmax()]["Rank"]
+    min_rank = df_rank.loc[df_rank["Total_Time"].idxmin()]["Rank"]
+    # assumption: row index and rank id are same in df_rank 
+    # .. need to check if that is an incorrect assumption
+    mean_rank = (
+        (df_rank["Total_Time"] - df_rank["Total_Time"].mean()).abs().argsort()[:1][0]
+    )
+    pd.set_option("display.max_rows", None, "display.max_columns", None)
+
+    list_combined = list_mpiop + list_rank
+    df_apmpi = pd.DataFrame(list_combined)
+    df_apmpi = df_apmpi.sort_values(by=["Rank", "Total_Time"], ascending=[True, False])
+    df_call = df_apmpi[['Call', 'Total_Time']]
+    #print("MPI stats for rank with maximum MPI time")#, border_style="blue")
+    print("MPI stats for rank with maximum MPI time\n", df_apmpi.loc[df_apmpi["Rank"] == max_rank])
+    print("\n\n")
+    print("MPI stats for rank with minimum MPI time")# border_style="blue")
+    print(df_apmpi.loc[df_apmpi["Rank"] == min_rank])
+    print("\n\n")
+    print("MPI stats for rank with mean MPI time")#, border_style="blue")
+    print(df_apmpi.loc[df_apmpi["Rank"] == mean_rank])
+    # print(df_apmpi)
+    #df_apmpi.to_csv('apmpi.csv', index=False)
+    #df_rank.to_csv('apmpi_rank.csv', index=False)
+   
+    encoded = []
+    buf = io.BytesIO()
+    fig, ax = pyplot.subplots()
+
+    sns_violin = sns.violinplot(x="Call", y="Total_Time", ax=ax, data=df_call)
+    sns_violin.set_xticklabels(sns_violin.get_xticklabels(), rotation=60, size=6.5)
+    sns_violin.set_yticklabels(sns_violin.get_yticks(), rotation=0, size=6.5)
+    sns_violin.set_xlabel('')
+    sns_violin.set_ylabel('Time (seconds)', size=7)
+    #sns.despine();
+    pyplot.savefig(buf, format='png', bbox_inches='tight')
+    buf.seek(0)
+    encoded.append(base64.b64encode(buf.read()))
+
+    buf = io.BytesIO()
+    fig, ax = pyplot.subplots()
+    sns_plot = sns.scatterplot(x="Rank", y="Total_Time", ax=ax, data=df_apmpi, s=3)
+    sns_plot.set_xticklabels(sns_plot.get_xticklabels(), rotation=0, size=6.5)
+    sns_plot.set_yticklabels(sns_plot.get_yticks(), rotation=0, size=6.5)
+    sns_plot.set_xlabel('Rank', size=8)
+    sns_plot.set_ylabel('Time (seconds)', size=8)
+    #sns.despine();
+    pyplot.savefig(buf, format='png', bbox_inches='tight')
+    buf.seek(0)
+    encoded.append(base64.b64encode(buf.read()))
+    return encoded
+  
 def main(args=None):
 
     if args is None:
@@ -162,6 +286,7 @@ def main(args=None):
 
     variables = {}
     report = darshan.DarshanReport(args.input, read_all=True)
+    report.info()
 
     #
     # Setup template header variabels
@@ -192,6 +317,16 @@ def main(args=None):
         stdio_df = report.records['STDIO'].to_df()
     else:
         stdio_df = None
+    
+    if 'APXC' in report.modules:
+        apxc_dict = report.records['APXC'].to_dict()
+    else:
+        apxc_dict = None
+
+    if 'APMPI' in report.modules:
+        apmpi_dict = report.records['APMPI'].to_dict()
+    else:
+        apmpi_dict = None
 
     #
     # Plot I/O cost
@@ -204,6 +339,9 @@ def main(args=None):
     variables['plot_op_count'] = plot_op_count(posix_df, mpiio_df, stdio_df).decode('utf-8')
 
     variables['fs_data'] = data_transfer_filesystem(report, posix_df, stdio_df)
+    apmpi_encoded = apmpi_process(apmpi_dict)
+    variables['apmpi_call_time'] = apmpi_encoded[0].decode('utf-8')
+    variables['apmpi_rank_totaltime'] = apmpi_encoded[1].decode('utf-8')
 
     template_path = pkg_resources.path(darshan.templates, '')
     with template_path as path:
@@ -214,7 +352,6 @@ def main(args=None):
        with open(args.output, 'w') as f:
            f.write(stream.render('html'))
            f.close()
-
     return
 
 if __name__ == "__main__":


=====================================
darshan-util/pydarshan/darshan/templates/summary.html
=====================================
@@ -357,5 +357,28 @@
 </div>
 </div>
 
+<div class="panel panel-primary">
+    <div class="panel-heading">
+        <h4 class="panel-title">
+            <a data-toggle="collapse" href="#collapse1">APMPI</a>
+        </h4>
+    </div>
+    <div id="collapse1" class="panel-collapse collapse in">
+    <div class="panel-body">
+        <div class="container">
+        <div class="row">
+            <div class="col-lg-6">
+                    <img class="img-rounded" alt="APMPI-call-times" src="data:image/png;base64,${var.apmpi_call_time}"/>
+            </div>
+            <div class="col-lg-6">
+                    <img class="img-rounded" alt="APMPI-rank-totaltimes" src="data:image/png;base64,${var.apmpi_rank_totaltime}"/>
+            </div>
+        </div>
+        </div>
+     </div>
+     </div>
+</div>
+
+
 </body>
 </html>



View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/-/commit/959529279372c75ba33563ead36c95513f57bac7

-- 
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/-/commit/959529279372c75ba33563ead36c95513f57bac7
You're receiving this email because of your account on xgitlab.cels.anl.gov.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20210407/b90b7288/attachment-0001.html>


More information about the Darshan-commits mailing list