[Darshan-commits] [Git][darshan/darshan][autoperf-mod-update] APMPI module analysis python script

Sudheer Chunduri xgitlab at cels.anl.gov
Wed Feb 17 15:50:26 CST 2021



Sudheer Chunduri pushed to branch autoperf-mod-update at darshan / darshan


Commits:
69496346 by Sudheer Chunduri at 2021-02-17T21:49:42+00:00
APMPI module analysis python script

- - - - -


2 changed files:

- darshan-util/pydarshan/examples/01_darshan-apmpi.ipynb
- + darshan-util/pydarshan/examples/01_darshan-apmpi.py


Changes:

=====================================
darshan-util/pydarshan/examples/01_darshan-apmpi.ipynb
=====================================
The diff for this file was not included because it is too large.

=====================================
darshan-util/pydarshan/examples/01_darshan-apmpi.py
=====================================
@@ -0,0 +1,155 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# # DarshanUtils for Python
+#
+# This notebook gives an overwiew of features provided by the Python bindings for DarshanUtils.
+
+# By default all AMMPI module records, metadata, and the name records are loaded when opening a Darshan log:
+
+
+"""
+get_ipython().run_line_magic('env', 'LD_LIBRARY_PATH=/projects/Performance/chunduri/Software/Temp/build/darshan-util/install/lib/')
+
+get_ipython().run_line_magic('env', 'PE_PKGCONFIG_LIBS=darshan-runtime')
+get_ipython().run_line_magic('env', 'PKG_CONFIG_PATH=/projects/Performance/chunduri/Software/Temp/build/darshan-runtime/lib/pkgconfig')
+get_ipython().run_line_magic('env', 'PATH=/projects/Performance/chunduri/Software/Temp/build/darshan-utils/bin:/opt/anaconda3x/bin:/opt/anaconda3x/condabin:/projects/Performance/chunduri/Work_backup_June252017/software/install/autotools/bin:/bin:/sbin:/opt/anaconda3x/bin:/usr/bin/usr/sbin:/usr/local/sbin:/usr/sbin:/dbhome/db2cat/sqllib/bin:/dbhome/db2cat/sqllib/adm:/dbhome/db2cat/sqllib/misc:/dbhome/db2cat/sqllib/gskit/bin:/opt/ibutils/bin:/home/chunduri/bin:/projects/Performance/chunduri/Software/Temp/build/darshan-util/install/bin')
+
+get_ipython().run_line_magic('env', 'darshan_prefix=/projects/Performance/chunduri/Software/Temp/build/darshan-runtime/install')
+get_ipython().run_line_magic('env', 'darshan_share=/projects/Performance/chunduri/Software/Temp/build/darshan-runtime/install/share')
+get_ipython().run_line_magic('env', 'darshan_libdir= -L${darshan_prefix}/lib')
+
+get_ipython().system('echo $LD_LIBRARY_PATH')
+get_ipython().system('echo $PATH')
+get_ipython().system('echo $PKG_CONFIG_PATH')
+"""
+
+#get_ipython().system("pwd")
+#get_ipython().run_line_magic("cd", "..")
+#get_ipython().system("pwd")
+import argparse
+import darshan
+
+import cffi
+import numpy
+import pandas
+import matplotlib
+import pprint
+import pandas as pd
+import logging
+
+from darshan.backend.cffi_backend import ffi
+
+logger = logging.getLogger(__name__)
+# logger
+from darshan.report import DarshanReport
+import darshan.backend.cffi_backend as backend
+import darshan
+import pandas as pd
+import time
+
+pp = pprint.PrettyPrinter()
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--quiet",
+        dest="quiet",
+        action="store_true",
+        default=False,
+        help="Surpress zero count calls",
+    )
+    parser.add_argument(
+        "logname", metavar="logname", type=str, nargs=1, help="Logname to parse"
+    )
+    args = parser.parse_args()
+
+    report = darshan.DarshanReport(args.logname[0], read_all=False)
+    report.info()
+
+    if "APMPI" not in report.modules:
+        print("This log does not contain AutoPerf MPI data")
+        return
+
+    r = report.mod_read_all_apmpi_records("APMPI")
+    # report.data.keys()
+    report.update_name_records()
+    report.info()
+
+    header_rec = report.records["APMPI"][0]
+    print("# darshan log version: ", header_rec["version"])
+    sync_flag = header_rec["sync_flag"]
+    print("APMPI Variance in total mpi time: ", header_rec["variance_total_mpitime"], "\n")
+    if sync_flag:
+        print(
+            "APMPI Variance in total mpi sync time: ", header_rec["variance_total_mpisynctime"]
+        )
+
+    df_apmpi = pd.DataFrame()
+    for rec in report.records["APMPI"][1:]:  # skip the first record which is header record
+        mpi_nonzero_callcount = []
+        for k, v in rec["all_counters"].items():
+            if k.endswith("_CALL_COUNT") and v > 0:
+                mpi_nonzero_callcount.append(k[: -(len("CALL_COUNT"))])
+
+        df_rank = pd.DataFrame()
+        for mpiop in mpi_nonzero_callcount:
+            ncall = mpiop
+            ncount = mpiop + "CALL_COUNT"
+            nsize = mpiop + "TOTAL_BYTES"
+            h0 = mpiop + "MSG_SIZE_AGG_0_256"
+            h1 = mpiop + "MSG_SIZE_AGG_256_1K"
+            h2 = mpiop + "MSG_SIZE_AGG_1K_8K"
+            h3 = mpiop + "MSG_SIZE_AGG_8K_256K"
+            h4 = mpiop + "MSG_SIZE_AGG_256K_1M"
+            h5 = mpiop + "MSG_SIZE_AGG_1M_PLUS"
+            ntime = mpiop + "TOTAL_TIME"
+            mintime = mpiop + "MIN_TIME"
+            maxtime = mpiop + "MAX_TIME"
+            if sync_flag:
+                totalsync = mpiop + "TOTAL_SYNC_TIME"
+
+            mpiopstat = {}
+            mpiopstat["Rank"] = rec["rank"]
+            mpiopstat["Node_ID"] = rec["node_name"]
+            mpiopstat["Call"] = ncall[:-1]
+            mpiopstat["Total_Time"] = rec["all_counters"][ntime]
+            mpiopstat["Count"] = rec["all_counters"][ncount]
+            mpiopstat["Total_Bytes"] = rec["all_counters"].get(nsize, None)
+            mpiopstat["[0-256B]"] = rec["all_counters"].get(h0, None)
+            mpiopstat["[256-1KB]"] = rec["all_counters"].get(h1, None)
+            mpiopstat["[1K-8KB]"] = rec["all_counters"].get(h2, None)
+            mpiopstat["[8K-256KB]"] = rec["all_counters"].get(h3, None)
+            mpiopstat["256K-1MB"] = rec["all_counters"].get(h4, None)
+            mpiopstat["[>1MB]"] = rec["all_counters"].get(h5, None)
+            mpiopstat["Min_Time"] = rec["all_counters"][mintime]
+            mpiopstat["Max_Time"] = rec["all_counters"][maxtime]
+            if sync_flag:
+                mpiopstat["Total_SYNC_Time"] = rec["all_counters"][totalsync]
+
+            df_mpiop = pd.DataFrame([mpiopstat], columns=mpiopstat.keys())
+            df_rank = pd.concat([df_rank, df_mpiop], axis=0).reset_index(drop=True)
+        df_rank = df_rank.sort_values(by=["Total_Time"], ascending=False)
+        df_apmpi = pd.concat([df_apmpi, df_rank], axis=0).reset_index(drop=True)
+    print(df_apmpi)
+
+    return
+
+if __name__ == '__main__':
+    main()
+# print(time.time(), time.clock())
+
+
+# A few of the internal data structures explained:
+
+# In[ ]:
+
+
+# report.metadata         # dictionary with raw metadata from darshan log
+# report.modules          # dictionary with raw module info from darshan log (need: technical, module idx)
+# report.name_records     # dictionary for resovling name records: id -> path/name
+# report.records          # per module "dataframes"/dictionaries holding loaded records
+
+
+# The darshan report holds a variety of namespaces for report related data. All of them are also referenced in `report.data` at the moment, but reliance on this internal organization of the report object is discouraged once the API stabilized. Currently, `report.data` references the following information:



View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/-/commit/694963461948af1cee346cf6911e82160cba7edd

-- 
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/-/commit/694963461948af1cee346cf6911e82160cba7edd
You're receiving this email because of your account on xgitlab.cels.anl.gov.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20210217/a469ffbb/attachment-0001.html>


More information about the Darshan-commits mailing list