[Darshan-commits] [Git][darshan/darshan][autoperf-mod-update] APMPI module analysis python script
Sudheer Chunduri
xgitlab at cels.anl.gov
Wed Feb 17 15:50:26 CST 2021
Sudheer Chunduri pushed to branch autoperf-mod-update at darshan / darshan
Commits:
69496346 by Sudheer Chunduri at 2021-02-17T21:49:42+00:00
APMPI module analysis python script
- - - - -
2 changed files:
- darshan-util/pydarshan/examples/01_darshan-apmpi.ipynb
- + darshan-util/pydarshan/examples/01_darshan-apmpi.py
Changes:
=====================================
darshan-util/pydarshan/examples/01_darshan-apmpi.ipynb
=====================================
The diff for this file was not included because it is too large.
=====================================
darshan-util/pydarshan/examples/01_darshan-apmpi.py
=====================================
@@ -0,0 +1,155 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# # DarshanUtils for Python
+#
+# This notebook gives an overwiew of features provided by the Python bindings for DarshanUtils.
+
+# By default all AMMPI module records, metadata, and the name records are loaded when opening a Darshan log:
+
+
+"""
+get_ipython().run_line_magic('env', 'LD_LIBRARY_PATH=/projects/Performance/chunduri/Software/Temp/build/darshan-util/install/lib/')
+
+get_ipython().run_line_magic('env', 'PE_PKGCONFIG_LIBS=darshan-runtime')
+get_ipython().run_line_magic('env', 'PKG_CONFIG_PATH=/projects/Performance/chunduri/Software/Temp/build/darshan-runtime/lib/pkgconfig')
+get_ipython().run_line_magic('env', 'PATH=/projects/Performance/chunduri/Software/Temp/build/darshan-utils/bin:/opt/anaconda3x/bin:/opt/anaconda3x/condabin:/projects/Performance/chunduri/Work_backup_June252017/software/install/autotools/bin:/bin:/sbin:/opt/anaconda3x/bin:/usr/bin/usr/sbin:/usr/local/sbin:/usr/sbin:/dbhome/db2cat/sqllib/bin:/dbhome/db2cat/sqllib/adm:/dbhome/db2cat/sqllib/misc:/dbhome/db2cat/sqllib/gskit/bin:/opt/ibutils/bin:/home/chunduri/bin:/projects/Performance/chunduri/Software/Temp/build/darshan-util/install/bin')
+
+get_ipython().run_line_magic('env', 'darshan_prefix=/projects/Performance/chunduri/Software/Temp/build/darshan-runtime/install')
+get_ipython().run_line_magic('env', 'darshan_share=/projects/Performance/chunduri/Software/Temp/build/darshan-runtime/install/share')
+get_ipython().run_line_magic('env', 'darshan_libdir= -L${darshan_prefix}/lib')
+
+get_ipython().system('echo $LD_LIBRARY_PATH')
+get_ipython().system('echo $PATH')
+get_ipython().system('echo $PKG_CONFIG_PATH')
+"""
+
+#get_ipython().system("pwd")
+#get_ipython().run_line_magic("cd", "..")
+#get_ipython().system("pwd")
+import argparse
+import darshan
+
+import cffi
+import numpy
+import pandas
+import matplotlib
+import pprint
+import pandas as pd
+import logging
+
+from darshan.backend.cffi_backend import ffi
+
+logger = logging.getLogger(__name__)
+# logger
+from darshan.report import DarshanReport
+import darshan.backend.cffi_backend as backend
+import darshan
+import pandas as pd
+import time
+
+pp = pprint.PrettyPrinter()
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--quiet",
+ dest="quiet",
+ action="store_true",
+ default=False,
+ help="Surpress zero count calls",
+ )
+ parser.add_argument(
+ "logname", metavar="logname", type=str, nargs=1, help="Logname to parse"
+ )
+ args = parser.parse_args()
+
+ report = darshan.DarshanReport(args.logname[0], read_all=False)
+ report.info()
+
+ if "APMPI" not in report.modules:
+ print("This log does not contain AutoPerf MPI data")
+ return
+
+ r = report.mod_read_all_apmpi_records("APMPI")
+ # report.data.keys()
+ report.update_name_records()
+ report.info()
+
+ header_rec = report.records["APMPI"][0]
+ print("# darshan log version: ", header_rec["version"])
+ sync_flag = header_rec["sync_flag"]
+ print("APMPI Variance in total mpi time: ", header_rec["variance_total_mpitime"], "\n")
+ if sync_flag:
+ print(
+ "APMPI Variance in total mpi sync time: ", header_rec["variance_total_mpisynctime"]
+ )
+
+ df_apmpi = pd.DataFrame()
+ for rec in report.records["APMPI"][1:]: # skip the first record which is header record
+ mpi_nonzero_callcount = []
+ for k, v in rec["all_counters"].items():
+ if k.endswith("_CALL_COUNT") and v > 0:
+ mpi_nonzero_callcount.append(k[: -(len("CALL_COUNT"))])
+
+ df_rank = pd.DataFrame()
+ for mpiop in mpi_nonzero_callcount:
+ ncall = mpiop
+ ncount = mpiop + "CALL_COUNT"
+ nsize = mpiop + "TOTAL_BYTES"
+ h0 = mpiop + "MSG_SIZE_AGG_0_256"
+ h1 = mpiop + "MSG_SIZE_AGG_256_1K"
+ h2 = mpiop + "MSG_SIZE_AGG_1K_8K"
+ h3 = mpiop + "MSG_SIZE_AGG_8K_256K"
+ h4 = mpiop + "MSG_SIZE_AGG_256K_1M"
+ h5 = mpiop + "MSG_SIZE_AGG_1M_PLUS"
+ ntime = mpiop + "TOTAL_TIME"
+ mintime = mpiop + "MIN_TIME"
+ maxtime = mpiop + "MAX_TIME"
+ if sync_flag:
+ totalsync = mpiop + "TOTAL_SYNC_TIME"
+
+ mpiopstat = {}
+ mpiopstat["Rank"] = rec["rank"]
+ mpiopstat["Node_ID"] = rec["node_name"]
+ mpiopstat["Call"] = ncall[:-1]
+ mpiopstat["Total_Time"] = rec["all_counters"][ntime]
+ mpiopstat["Count"] = rec["all_counters"][ncount]
+ mpiopstat["Total_Bytes"] = rec["all_counters"].get(nsize, None)
+ mpiopstat["[0-256B]"] = rec["all_counters"].get(h0, None)
+ mpiopstat["[256-1KB]"] = rec["all_counters"].get(h1, None)
+ mpiopstat["[1K-8KB]"] = rec["all_counters"].get(h2, None)
+ mpiopstat["[8K-256KB]"] = rec["all_counters"].get(h3, None)
+ mpiopstat["256K-1MB"] = rec["all_counters"].get(h4, None)
+ mpiopstat["[>1MB]"] = rec["all_counters"].get(h5, None)
+ mpiopstat["Min_Time"] = rec["all_counters"][mintime]
+ mpiopstat["Max_Time"] = rec["all_counters"][maxtime]
+ if sync_flag:
+ mpiopstat["Total_SYNC_Time"] = rec["all_counters"][totalsync]
+
+ df_mpiop = pd.DataFrame([mpiopstat], columns=mpiopstat.keys())
+ df_rank = pd.concat([df_rank, df_mpiop], axis=0).reset_index(drop=True)
+ df_rank = df_rank.sort_values(by=["Total_Time"], ascending=False)
+ df_apmpi = pd.concat([df_apmpi, df_rank], axis=0).reset_index(drop=True)
+ print(df_apmpi)
+
+ return
+
+if __name__ == '__main__':
+ main()
+# print(time.time(), time.clock())
+
+
+# A few of the internal data structures explained:
+
+# In[ ]:
+
+
+# report.metadata # dictionary with raw metadata from darshan log
+# report.modules # dictionary with raw module info from darshan log (need: technical, module idx)
+# report.name_records # dictionary for resovling name records: id -> path/name
+# report.records # per module "dataframes"/dictionaries holding loaded records
+
+
+# The darshan report holds a variety of namespaces for report related data. All of them are also referenced in `report.data` at the moment, but reliance on this internal organization of the report object is discouraged once the API stabilized. Currently, `report.data` references the following information:
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/-/commit/694963461948af1cee346cf6911e82160cba7edd
--
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/-/commit/694963461948af1cee346cf6911e82160cba7edd
You're receiving this email because of your account on xgitlab.cels.anl.gov.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20210217/a469ffbb/attachment-0001.html>
More information about the Darshan-commits
mailing list