[Darshan-commits] [Git][darshan/darshan][dev-modular] update modules to print counter descriptions
Shane Snyder
xgitlab at cels.anl.gov
Fri Dec 11 16:20:33 CST 2015
Shane Snyder pushed to branch dev-modular at darshan / darshan
Commits:
9b021c1b by Shane Snyder at 2015-12-11T16:20:12Z
update modules to print counter descriptions
- - - - -
9 changed files:
- darshan-bgq-log-format.h
- darshan-util/darshan-bgq-logutils.c
- darshan-util/darshan-hdf5-logutils.c
- darshan-util/darshan-logutils.h
- darshan-util/darshan-mpiio-logutils.c
- darshan-util/darshan-null-logutils.c
- darshan-util/darshan-parser.c
- darshan-util/darshan-pnetcdf-logutils.c
- darshan-util/darshan-posix-logutils.c
Changes:
=====================================
darshan-bgq-log-format.h
=====================================
--- a/darshan-bgq-log-format.h
+++ b/darshan-bgq-log-format.h
@@ -11,24 +11,38 @@
#define DARSHAN_BGQ_VER 1
#define BGQ_COUNTERS \
- X(BGQ_CSJOBID, "control system jobid") \
- X(BGQ_NNODES, "number of BGQ compute nodes") \
- X(BGQ_RANKSPERNODE, "number of MPI ranks per node") \
- X(BGQ_DDRPERNODE, "size in MB of DDR3 per node") \
- X(BGQ_INODES, "number of i/o nodes") \
- X(BGQ_ANODES, "dimension of A torus") \
- X(BGQ_BNODES, "dimension of B torus") \
- X(BGQ_CNODES, "dimension of C torus") \
- X(BGQ_DNODES, "dimension of D torus") \
- X(BGQ_ENODES, "dimension of E torus") \
- X(BGQ_TORUSENABLED, "which dimensions are torus") \
- X(BGQ_NUM_INDICES, "end of counters")
+ /* control system jobid*/\
+ X(BGQ_CSJOBID) \
+ /* number of BGQ compute nodes */\
+ X(BGQ_NNODES) \
+ /* number of MPI ranks per node */\
+ X(BGQ_RANKSPERNODE) \
+ /* size in MB of DDR3 per node */\
+ X(BGQ_DDRPERNODE) \
+ /* number of i/o nodes */\
+ X(BGQ_INODES) \
+ /* dimension of A torus */\
+ X(BGQ_ANODES) \
+ /* dimension of B torus */\
+ X(BGQ_BNODES) \
+ /* dimension of C torus */\
+ X(BGQ_CNODES) \
+ /* dimension of D torus */\
+ X(BGQ_DNODES) \
+ /* dimension of E torus */\
+ X(BGQ_ENODES) \
+ /* which dimensions are torus */\
+ X(BGQ_TORUSENABLED) \
+ /* end of counters */\
+ X(BGQ_NUM_INDICES)
#define BGQ_F_COUNTERS \
- X(BGQ_F_TIMESTAMP, "timestamp when data was collected") \
- X(BGQ_F_NUM_INDICES, "end of counters")
+ /* timestamp when data was collected */\
+ X(BGQ_F_TIMESTAMP) \
+ /* end of counters */\
+ X(BGQ_F_NUM_INDICES)
-#define X(a, b) a,
+#define X(a) a,
/* integer counters for the "BGQ" example module */
enum darshan_bgq_indices
{
=====================================
darshan-util/darshan-bgq-logutils.c
=====================================
--- a/darshan-util/darshan-bgq-logutils.c
+++ b/darshan-util/darshan-bgq-logutils.c
@@ -20,7 +20,7 @@
#include "darshan-logutils.h"
/* counter name strings for the POSIX module */
-#define X(a, b) #a,
+#define X(a) #a,
char *bgq_counter_names[] = {
BGQ_COUNTERS
};
@@ -35,12 +35,14 @@ static int darshan_log_get_bgq_rec(darshan_fd fd, void* bgq_buf,
static int darshan_log_put_bgq_rec(darshan_fd fd, void* bgq_buf, int ver);
static void darshan_log_print_bgq_rec(void *file_rec,
char *file_name, char *mnt_pt, char *fs_type, int ver);
+static void darshan_log_print_bgq_description(void);
struct darshan_mod_logutil_funcs bgq_logutils =
{
.log_get_record = &darshan_log_get_bgq_rec,
.log_put_record = &darshan_log_put_bgq_rec,
.log_print_record = &darshan_log_print_bgq_rec,
+ .log_print_description = &darshan_log_print_bgq_description
};
static int darshan_log_get_bgq_rec(darshan_fd fd, void* bgq_buf,
@@ -112,6 +114,23 @@ static void darshan_log_print_bgq_rec(void *file_rec, char *file_name,
return;
}
+static void darshan_log_print_bgq_description()
+{
+ printf("\n# desription of BGQ counters:\n");
+ printf("# BGQ_CSJOBID: BGQ control system job ID.\n");
+ printf("# BGQ_NNODES: number of BGQ compute nodes for this job.\n");
+ printf("# BGQ_RANKSPERNODE: number of MPI ranks per compute node.\n");
+ printf("# BGQ_DDRPERNODE: size in MB of DDR3 per compute node.\n");
+ printf("# BGQ_INODES: number of BGQ I/O nodes for this job.\n");
+ printf("# BGQ_*NODES: dimension of A, B, C, D, & E dimensions of torus.\n");
+ printf("# BGQ_TORUSENABLED: which dimensions of the torus are enabled.\n");
+ printf("# BGQ_F_TIMESTAMP: timestamp when the BGQ data was collected.\n");
+
+ DARSHAN_PRINT_HEADER();
+
+ return;
+}
+
/*
* Local variables:
* c-indent-level: 4
=====================================
darshan-util/darshan-hdf5-logutils.c
=====================================
--- a/darshan-util/darshan-hdf5-logutils.c
+++ b/darshan-util/darshan-hdf5-logutils.c
@@ -35,12 +35,14 @@ static int darshan_log_get_hdf5_file(darshan_fd fd, void* hdf5_buf,
static int darshan_log_put_hdf5_file(darshan_fd fd, void* hdf5_buf, int ver);
static void darshan_log_print_hdf5_file(void *file_rec,
char *file_name, char *mnt_pt, char *fs_type, int ver);
+static void darshan_log_print_hdf5_description(void);
struct darshan_mod_logutil_funcs hdf5_logutils =
{
.log_get_record = &darshan_log_get_hdf5_file,
.log_put_record = &darshan_log_put_hdf5_file,
.log_print_record = &darshan_log_print_hdf5_file,
+ .log_print_description = &darshan_log_print_hdf5_description
};
static int darshan_log_get_hdf5_file(darshan_fd fd, void* hdf5_buf,
@@ -112,6 +114,18 @@ static void darshan_log_print_hdf5_file(void *file_rec, char *file_name,
return;
}
+static void darshan_log_print_hdf5_description()
+{
+ printf("\n# desription of HDF5 counters:\n");
+ printf("# HDF5_OPENS: HDF5 file open operation counts.\n");
+ printf("# HDF5_F_OPEN_TIMESTAMP: timestamp of first HDF5 file open.\n");
+ printf("# HDF5_F_CLOSE_TIMESTAMP: timestamp of last HDF5 file close.\n");
+
+ DARSHAN_PRINT_HEADER();
+
+ return;
+}
+
/*
* Local variables:
* c-indent-level: 4
=====================================
darshan-util/darshan-logutils.h
=====================================
--- a/darshan-util/darshan-logutils.h
+++ b/darshan-util/darshan-logutils.h
@@ -89,6 +89,8 @@ struct darshan_mod_logutil_funcs
char *fs_type,
int ver
);
+ /* print module-specific description of I/O characterization data */
+ void (*log_print_description)(void);
};
extern struct darshan_mod_logutil_funcs *mod_logutils[];
=====================================
darshan-util/darshan-mpiio-logutils.c
=====================================
--- a/darshan-util/darshan-mpiio-logutils.c
+++ b/darshan-util/darshan-mpiio-logutils.c
@@ -35,12 +35,14 @@ static int darshan_log_get_mpiio_file(darshan_fd fd, void* mpiio_buf,
static int darshan_log_put_mpiio_file(darshan_fd fd, void* mpiio_buf, int ver);
static void darshan_log_print_mpiio_file(void *file_rec,
char *file_name, char *mnt_pt, char *fs_type, int ver);
+static void darshan_log_print_mpiio_description(void);
struct darshan_mod_logutil_funcs mpiio_logutils =
{
.log_get_record = &darshan_log_get_mpiio_file,
.log_put_record = &darshan_log_put_mpiio_file,
.log_print_record = &darshan_log_print_mpiio_file,
+ .log_print_description = &darshan_log_print_mpiio_description
};
static int darshan_log_get_mpiio_file(darshan_fd fd, void* mpiio_buf,
@@ -112,6 +114,40 @@ static void darshan_log_print_mpiio_file(void *file_rec, char *file_name,
return;
}
+static void darshan_log_print_mpiio_description()
+{
+ printf("\n# desription of MPIIO counters:\n");
+ printf("# MPIIO_INDEP_*: MPI independent operation counts.\n");
+ printf("# MPIIO_COLL_*: MPI collective operation counts.\n");
+ printf("# MPIIO_SPLIT_*: MPI split collective operation counts.\n");
+ printf("# MPIIO_NB_*: MPI non blocking operation counts.\n");
+ printf("# READS,WRITES,and OPENS are types of operations.\n");
+ printf("# MPIIO_SYNCS: MPI file sync operation counts.\n");
+ printf("# MPIIO_HINTS: number of times MPI hints were used.\n");
+ printf("# MPIIO_VIEWS: number of times MPI file views were used.\n");
+ printf("# MPIIO_MODE: MPI-IO access mode that file was opened with.\n");
+ printf("# MPIIO_BYTES_*: total bytes read and written at MPI-IO layer.\n");
+ printf("# MPIIO_RW_SWITCHES: number of times access alternated between read and write.\n");
+ printf("# MPIIO_MAX_*_TIME_SIZE: size of the slowest read and write operations.\n");
+ printf("# MPIIO_SIZE_*_AGG_*: histogram of MPI datatype total sizes for read and write operations.\n");
+ printf("# MPIIO_ACCESS*_ACCESS: the four most common total access sizes.\n");
+ printf("# MPIIO_ACCESS*_COUNT: count of the four most common total access sizes.\n");
+ printf("# MPIIO_*_RANK: rank of the processes that were the fastest and slowest at I/O (for shared files).\n");
+ printf("# MPIIO_*_RANK_BYTES: total bytes transferred at MPI-IO layer by the fastest and slowest ranks (for shared files).\n");
+ printf("# MPIIO_F_OPEN_TIMESTAMP: timestamp of first open.\n");
+ printf("# MPIIO_F_*_START_TIMESTAMP: timestamp of first MPI-IO read/write.\n");
+ printf("# MPIIO_F_*_END_TIMESTAMP: timestamp of last MPI-IO read/write.\n");
+ printf("# MPIIO_F_CLOSE_TIMESTAMP: timestamp of last close.\n");
+ printf("# MPIIO_F_READ/WRITE/META_TIME: cumulative time spent in MPI-IO read, write, or metadata operations.\n");
+ printf("# MPIIO_F_MAX_*_TIME: duration of the slowest MPI-IO read and write operations.\n");
+ printf("# MPIIO_F_*_RANK_TIME: fastest and slowest I/O time for a single rank (for shared files).\n");
+ printf("# MPIIO_F_VARIANCE_RANK_*: variance of total I/O time and bytes moved for all ranks (for shared files).\n");
+
+ DARSHAN_PRINT_HEADER();
+
+ return;
+}
+
/*
* Local variables:
* c-indent-level: 4
=====================================
darshan-util/darshan-null-logutils.c
=====================================
--- a/darshan-util/darshan-null-logutils.c
+++ b/darshan-util/darshan-null-logutils.c
@@ -37,6 +37,7 @@ static int darshan_log_get_null_record(darshan_fd fd, void* null_buf,
static int darshan_log_put_null_record(darshan_fd fd, void* null_buf, int ver);
static void darshan_log_print_null_record(void *file_rec,
char *file_name, char *mnt_pt, char *fs_type, int ver);
+static void darshan_log_print_null_description(void);
/* structure storing each function needed for implementing the darshan
* logutil interface. these functions are used for reading, writing, and
@@ -47,6 +48,7 @@ struct darshan_mod_logutil_funcs null_logutils =
.log_get_record = &darshan_log_get_null_record,
.log_put_record = &darshan_log_put_null_record,
.log_print_record = &darshan_log_print_null_record,
+ .log_print_description = &darshan_log_print_null_description
};
/* retrieve a NULL record from log file descriptor 'fd', storing the
@@ -134,6 +136,18 @@ static void darshan_log_print_null_record(void *file_rec, char *file_name,
return;
}
+/* print out a description of the NULL module record fields */
+static void darshan_log_print_null_description()
+{
+ printf("\n# desription of NULL counters:\n");
+ printf("# NULL_BARS: number of 'bar' function calls.\n");
+ printf("# NULL_BAR_DAT: value set by last call to function 'bar'.\n");
+ printf("# NULL_F_BAR_TIMESTAMP: timestamp of the first call to function 'bar'.\n");
+ printf("# NULL_F_BAR_DURATION: duration of the last call to function 'bar'.\n");
+
+ return;
+}
+
/*
* Local variables:
* c-indent-level: 4
=====================================
darshan-util/darshan-parser.c
=====================================
--- a/darshan-util/darshan-parser.c
+++ b/darshan-util/darshan-parser.c
@@ -321,6 +321,21 @@ int main(int argc, char **argv)
printf("# mount entry:\t%s\t%s\n", mnt_pts[i], fs_types[i]);
}
+ if(mask & OPTION_BASE)
+ {
+ printf("\n# description of columns:\n");
+ printf("# <module>: module responsible for this I/O record.\n");
+ printf("# <rank>: MPI rank. -1 indicates that the file is shared\n");
+ printf("# across all processes and statistics are aggregated.\n");
+ printf("# <record id>: hash of the record's file path\n");
+ printf("# <counter name> and <counter value>: statistical counters.\n");
+ printf("# A value of -1 indicates that Darshan could not monitor\n");
+ printf("# that counter, and its value should be ignored.\n");
+ printf("# <file name>: full file path for the record.\n");
+ printf("# <mount pt>: mount point that the file resides on.\n");
+ printf("# <fs type>: type of file system that the file resides on.\n");
+ }
+
/* warn user if this log file is incomplete */
pdata.rank_cumul_io_time = malloc(sizeof(double)*job.nprocs);
pdata.rank_cumul_md_time = malloc(sizeof(double)*job.nprocs);
@@ -374,8 +389,9 @@ int main(int argc, char **argv)
if(mask & OPTION_BASE)
{
- /* TODO: does each module print header of what each counter means??? */
- DARSHAN_PRINT_HEADER();
+ /* print a header describing the module's I/O characterization data */
+ if(mod_logutils[i]->log_print_description)
+ mod_logutils[i]->log_print_description();
}
ret = mod_logutils[i]->log_get_record(fd, mod_buf, &rec_id);
=====================================
darshan-util/darshan-pnetcdf-logutils.c
=====================================
--- a/darshan-util/darshan-pnetcdf-logutils.c
+++ b/darshan-util/darshan-pnetcdf-logutils.c
@@ -35,12 +35,14 @@ static int darshan_log_get_pnetcdf_file(darshan_fd fd, void* pnetcdf_buf,
static int darshan_log_put_pnetcdf_file(darshan_fd fd, void* pnetcdf_buf, int ver);
static void darshan_log_print_pnetcdf_file(void *file_rec,
char *file_name, char *mnt_pt, char *fs_type, int ver);
+static void darshan_log_print_pnetcdf_description(void);
struct darshan_mod_logutil_funcs pnetcdf_logutils =
{
.log_get_record = &darshan_log_get_pnetcdf_file,
.log_put_record = &darshan_log_put_pnetcdf_file,
.log_print_record = &darshan_log_print_pnetcdf_file,
+ .log_print_description = &darshan_log_print_pnetcdf_description
};
static int darshan_log_get_pnetcdf_file(darshan_fd fd, void* pnetcdf_buf,
@@ -112,6 +114,19 @@ static void darshan_log_print_pnetcdf_file(void *file_rec, char *file_name,
return;
}
+static void darshan_log_print_pnetcdf_description()
+{
+ printf("\n# desription of PNETCDF counters:\n");
+ printf("# PNETCDF_INDEP_OPENS: PNETCDF independent file open operation counts.\n");
+ printf("# PNETCDF_COLL_OPENS: PNETCDF collective file open operation counts.\n");
+ printf("# PNETCDF_F_OPEN_TIMESTAMP: timestamp of first PNETCDF file open.\n");
+ printf("# PNETCDF_F_CLOSE_TIMESTAMP: timestamp of last PNETCDF file close.\n");
+
+ DARSHAN_PRINT_HEADER();
+
+ return;
+}
+
/*
* Local variables:
* c-indent-level: 4
=====================================
darshan-util/darshan-posix-logutils.c
=====================================
--- a/darshan-util/darshan-posix-logutils.c
+++ b/darshan-util/darshan-posix-logutils.c
@@ -35,12 +35,14 @@ static int darshan_log_get_posix_file(darshan_fd fd, void* posix_buf,
static int darshan_log_put_posix_file(darshan_fd fd, void* posix_buf, int ver);
static void darshan_log_print_posix_file(void *file_rec,
char *file_name, char *mnt_pt, char *fs_type, int ver);
+static void darshan_log_print_posix_description(void);
struct darshan_mod_logutil_funcs posix_logutils =
{
.log_get_record = &darshan_log_get_posix_file,
.log_put_record = &darshan_log_put_posix_file,
.log_print_record = &darshan_log_print_posix_file,
+ .log_print_description = &darshan_log_print_posix_description
};
static int darshan_log_get_posix_file(darshan_fd fd, void* posix_buf,
@@ -112,6 +114,41 @@ static void darshan_log_print_posix_file(void *file_rec, char *file_name,
return;
}
+static void darshan_log_print_posix_description()
+{
+ printf("\n# desription of POSIX counters:\n");
+ printf("# POSIX_*: posix operation counts.\n");
+ printf("# READS,WRITES,OPENS,SEEKS,STATS, and MMAPS are types of operations.\n");
+ printf("# POSIX_MODE: mode that file was opened in.\n");
+ printf("# POSIX_BYTES_*: total bytes read and written.\n");
+ printf("# POSIX_MAX_BYTE_*: highest offset byte read and written.\n");
+ printf("# POSIX_CONSEC_*: number of exactly adjacent reads and writes.\n");
+ printf("# POSIX_SEQ_*: number of reads and writes from increasing offsets.\n");
+ printf("# POSIX_RW_SWITCHES: number of times access alternated between read and write.\n");
+ printf("# POSIX_*_ALIGNMENT: memory and file alignment.\n");
+ printf("# POSIX_*_NOT_ALIGNED: number of reads and writes that were not aligned.\n");
+ printf("# POSIX_MAX_*_TIME_SIZE: size of the slowest read and write operations.\n");
+ printf("# POSIX_SIZE_*_*: histogram of read and write access sizes.\n");
+ printf("# POSIX_STRIDE*_STRIDE: the four most common strides detected.\n");
+ printf("# POSIX_STRIDE*_COUNT: count of the four most common strides.\n");
+ printf("# POSIX_ACCESS*_ACCESS: the four most common access sizes.\n");
+ printf("# POSIX_ACCESS*_COUNT: count of the four most common access sizes.\n");
+ printf("# POSIX_*_RANK: rank of the processes that were the fastest and slowest at I/O (for shared files).\n");
+ printf("# POSIX_*_RANK_BYTES: bytes transferred by the fastest and slowest ranks (for shared files).\n");
+ printf("# POSIX_F_OPEN_TIMESTAMP: timestamp of first open.\n");
+ printf("# POSIX_F_*_START_TIMESTAMP: timestamp of first read/write.\n");
+ printf("# POSIX_F_*_END_TIMESTAMP: timestamp of last read/write.\n");
+ printf("# POSIX_F_CLOSE_TIMESTAMP: timestamp of last close.\n");
+ printf("# POSIX_F_READ/WRITE/META_TIME: cumulative time spent in read, write, or metadata operations.\n");
+ printf("# POSIX_F_MAX_*_TIME: duration of the slowest read and write operations.\n");
+ printf("# POSIX_F_*_RANK_TIME: fastest and slowest I/O time for a single rank (for shared files).\n");
+ printf("# POSIX_F_VARIANCE_RANK_*: variance of total I/O time and bytes moved for all ranks (for shared files).\n");
+
+ DARSHAN_PRINT_HEADER();
+
+ return;
+}
+
/*
* Local variables:
* c-indent-level: 4
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/commit/9b021c1bba1f7a7f7889333fa1f07cba5df159a4
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20151211/6e49ece8/attachment-0001.html>
More information about the Darshan-commits
mailing list