[Darshan-commits] [Git][darshan/darshan][master] 2 commits: improve handling of incomplete logs
Shane Snyder
xgitlab at cels.anl.gov
Tue Mar 16 14:05:53 CDT 2021
Shane Snyder pushed to branch master at darshan / darshan
Commits:
1bd5b2db by Philip Carns at 2021-03-16T14:05:47-05:00
improve handling of incomplete logs
- exit with error by default in parsers
- show more verbose error message with tips to help
- add --show-incomplete option to enable previous behavior
- properly set incomplete flag for modules that don't store any records
- - - - -
7d4a80ab by Shane Snyder at 2021-03-16T14:05:47-05:00
Merge branch 'carns/issue-284-incomplete-warning' into 'master'
improve handling of incomplete logs
Closes #284
See merge request darshan/darshan!87
- - - - -
14 changed files:
- ChangeLog
- darshan-runtime/lib/darshan-bgq.c
- darshan-runtime/lib/darshan-hdf5.c
- darshan-runtime/lib/darshan-lustre.c
- darshan-runtime/lib/darshan-mdhim.c
- darshan-runtime/lib/darshan-mpiio.c
- darshan-runtime/lib/darshan-null.c
- darshan-runtime/lib/darshan-pnetcdf.c
- darshan-runtime/lib/darshan-posix.c
- darshan-runtime/lib/darshan-stdio.c
- darshan-util/darshan-dxt-parser.c
- darshan-util/darshan-job-summary/bin/darshan-job-summary.pl.in
- darshan-util/darshan-job-summary/share/summary.tex
- darshan-util/darshan-parser.c
Changes:
=====================================
ChangeLog
=====================================
@@ -2,10 +2,12 @@
Darshan Release Change Log
--------------------------
-Darshan-3.2.2
+Darshan-3.3
=============
* add wrappers for preadv, preadv2, pwritev, and pwritev2 (improves profiling
of ompio)
+* improve error handling in command line parsers to make incomplete logs more
+ obvious to users
Darshan-3.2.1
=============
=====================================
darshan-runtime/lib/darshan-bgq.c
=====================================
@@ -136,14 +136,6 @@ void bgq_runtime_initialize()
&my_rank,
NULL);
- /* not enough memory to fit bgq module record */
- if(bgq_buf_size < sizeof(struct darshan_bgq_record))
- {
- darshan_core_unregister_module(DARSHAN_BGQ_MOD);
- BGQ_UNLOCK();
- return;
- }
-
/* initialize module's global state */
bgq_runtime = malloc(sizeof(*bgq_runtime));
if(!bgq_runtime)
=====================================
darshan-runtime/lib/darshan-hdf5.c
=====================================
@@ -933,13 +933,6 @@ static void hdf5_file_runtime_initialize()
&my_rank,
NULL);
- /* return if darshan-core does not provide enough module memory */
- if(hdf5_buf_size < sizeof(struct darshan_hdf5_file))
- {
- darshan_core_unregister_module(DARSHAN_H5F_MOD);
- return;
- }
-
hdf5_file_runtime = malloc(sizeof(*hdf5_file_runtime));
if(!hdf5_file_runtime)
{
@@ -972,13 +965,6 @@ static void hdf5_dataset_runtime_initialize()
&my_rank,
NULL);
- /* return if darshan-core does not provide enough module memory */
- if(hdf5_buf_size < sizeof(struct darshan_hdf5_dataset))
- {
- darshan_core_unregister_module(DARSHAN_H5D_MOD);
- return;
- }
-
hdf5_dataset_runtime = malloc(sizeof(*hdf5_dataset_runtime));
if(!hdf5_dataset_runtime)
{
=====================================
darshan-runtime/lib/darshan-lustre.c
=====================================
@@ -203,15 +203,6 @@ static void lustre_runtime_initialize()
&my_rank,
NULL);
- if(lustre_buf_size < LUSTRE_RECORD_SIZE(1))
- {
- /* unregister module if we aren't allocated enough space for
- * the smallest possible record
- */
- darshan_core_unregister_module(DARSHAN_LUSTRE_MOD);
- return;
- }
-
lustre_runtime = malloc(sizeof(*lustre_runtime));
if(!lustre_runtime)
{
=====================================
darshan-runtime/lib/darshan-mdhim.c
=====================================
@@ -325,14 +325,6 @@ static void mdhim_runtime_initialize()
&my_rank,
NULL);
- /* return if darshan-core does not provide enough module memory for at
- * least one MDHIM record
- */
- if(mdhim_buf_size < sizeof(struct darshan_mdhim_record))
- {
- darshan_core_unregister_module(DARSHAN_MDHIM_MOD);
- return;
- }
/* initialize module's global state */
mdhim_runtime = calloc(1, sizeof(*mdhim_runtime));
=====================================
darshan-runtime/lib/darshan-mpiio.c
=====================================
@@ -1174,13 +1174,6 @@ static void mpiio_runtime_initialize()
&my_rank,
NULL);
- /* return if darshan-core does not provide enough module memory */
- if(mpiio_buf_size < sizeof(struct darshan_mpiio_file))
- {
- darshan_core_unregister_module(DARSHAN_MPIIO_MOD);
- return;
- }
-
mpiio_runtime = malloc(sizeof(*mpiio_runtime));
if(!mpiio_runtime)
{
=====================================
darshan-runtime/lib/darshan-null.c
=====================================
@@ -224,15 +224,6 @@ static void null_runtime_initialize()
&my_rank,
NULL);
- /* return if darshan-core does not provide enough module memory for at
- * least one NULL record
- */
- if(null_buf_size < sizeof(struct darshan_null_record))
- {
- darshan_core_unregister_module(DARSHAN_NULL_MOD);
- return;
- }
-
/* initialize module's global state */
null_runtime = malloc(sizeof(*null_runtime));
if(!null_runtime)
=====================================
darshan-runtime/lib/darshan-pnetcdf.c
=====================================
@@ -231,13 +231,6 @@ static void pnetcdf_runtime_initialize()
&my_rank,
NULL);
- /* return if darshan-core does not provide enough module memory */
- if(pnetcdf_buf_size < sizeof(struct darshan_pnetcdf_file))
- {
- darshan_core_unregister_module(DARSHAN_PNETCDF_MOD);
- return;
- }
-
pnetcdf_runtime = malloc(sizeof(*pnetcdf_runtime));
if(!pnetcdf_runtime)
{
=====================================
darshan-runtime/lib/darshan-posix.c
=====================================
@@ -1883,13 +1883,6 @@ static void posix_runtime_initialize()
&my_rank,
&darshan_mem_alignment);
- /* return if darshan-core does not provide enough module memory */
- if(psx_buf_size < sizeof(struct darshan_posix_file))
- {
- darshan_core_unregister_module(DARSHAN_POSIX_MOD);
- return;
- }
-
posix_runtime = malloc(sizeof(*posix_runtime));
if(!posix_runtime)
{
=====================================
darshan-runtime/lib/darshan-stdio.c
=====================================
@@ -1018,13 +1018,6 @@ static void stdio_runtime_initialize()
&my_rank,
&darshan_mem_alignment);
- /* return if darshan-core does not provide enough module memory */
- if(stdio_buf_size < sizeof(struct darshan_stdio_file))
- {
- darshan_core_unregister_module(DARSHAN_STDIO_MOD);
- return;
- }
-
stdio_runtime = malloc(sizeof(*stdio_runtime));
if(!stdio_runtime)
{
=====================================
darshan-util/darshan-dxt-parser.c
=====================================
@@ -20,15 +20,14 @@
#include "darshan-logutils.h"
-int usage (char *exename)
-{
- fprintf(stderr, "Usage: %s <filename>\n", exename);
+#define OPTION_SHOW_INCOMPLETE (1 << 7) /* show what we have, even if log is incomplete */
- exit(1);
-}
+static int usage (char *exename);
+static int parse_args (int argc, char **argv, char **filename);
int main(int argc, char **argv)
{
+ int mask;
int ret;
int i, j;
char *filename;
@@ -49,10 +48,7 @@ int main(int argc, char **argv)
struct lustre_record_ref *lustre_rec_hash = NULL;
char *mod_buf = NULL;
- if (argc != 2)
- usage(argv[0]);
-
- filename = argv[1];
+ mask = parse_args(argc, argv, &filename);
fd = darshan_log_open(filename);
if (!fd)
@@ -195,11 +191,44 @@ int main(int argc, char **argv)
continue;
/* print warning if this module only stored partial data */
- if(DARSHAN_MOD_FLAG_ISSET(fd->partial_flag, i))
- printf("\n# *WARNING*: The %s module contains incomplete data!\n"
- "# This happens when a module runs out of\n"
- "# memory to store new record data.\n",
- darshan_module_names[i]);
+ if(DARSHAN_MOD_FLAG_ISSET(fd->partial_flag, i)) {
+ if(mask & OPTION_SHOW_INCOMPLETE)
+ {
+ /* user requested that we show the data we have anyway */
+ printf("\n# *WARNING*: "
+ "The %s module contains incomplete data!\n"
+ "# This happens when a module runs out of\n"
+ "# memory to store new record data.\n",
+ darshan_module_names[i]);
+ printf(
+ "\n# To avoid this error, consult the darshan-runtime\n"
+ "# documentation and consider setting the\n"
+ "# DARSHAN_EXCLUDE_DIRS or DXT_TRIGGER_CONF_PATH\n"
+ "# environment variable to prevent Darshan from\n"
+ "# instrumenting unecessary files.\n");
+ }
+ else
+ {
+ /* hard error */
+ fprintf(stderr, "\n# *ERROR*: "
+ "The %s module contains incomplete data!\n"
+ "# This happens when a module runs out of\n"
+ "# memory to store new record data.\n",
+ darshan_module_names[i]);
+ fprintf(stderr,
+ "\n# To avoid this error, consult the darshan-runtime\n"
+ "# documentation and consider setting the\n"
+ "# DARSHAN_EXCLUDE_DIRS or DXT_TRIGGER_CONF_PATH\n"
+ "# environment variable to prevent Darshan from\n"
+ "# instrumenting unecessary files.\n");
+ fprintf(stderr,
+ "\n# You can display the (incomplete) data that is\n"
+ "# present in this log using the --show-incomplete\n"
+ "# option to darshan-dxt-parser.\n");
+ return(-1);
+ }
+
+ }
/* loop over each of this module's records and print them */
while(1)
@@ -311,6 +340,59 @@ cleanup:
return(ret);
}
+static int parse_args (int argc, char **argv, char **filename)
+{
+ int index;
+ int mask;
+ static struct option long_opts[] =
+ {
+ {"show-incomplete", 0, NULL, OPTION_SHOW_INCOMPLETE},
+ {"help", 0, NULL, 0},
+ {0, 0, 0, 0}
+ };
+
+ mask = 0;
+
+ while(1)
+ {
+ int c = getopt_long(argc, argv, "", long_opts, &index);
+
+ if (c == -1) break;
+
+ switch(c)
+ {
+ case OPTION_SHOW_INCOMPLETE:
+ mask |= c;
+ break;
+ case 0:
+ case '?':
+ default:
+ usage(argv[0]);
+ break;
+ }
+ }
+
+ if (optind < argc)
+ {
+ *filename = argv[optind];
+ }
+ else
+ {
+ usage(argv[0]);
+ }
+
+ return mask;
+}
+
+static int usage (char *exename)
+{
+ fprintf(stderr, "Usage: %s [options] <filename>\n", exename);
+ fprintf(stderr, " --show-incomplete : display results even if log is incomplete\n");
+
+ exit(1);
+}
+
+
/*
* Local variables:
* c-indent-level: 4
=====================================
darshan-util/darshan-job-summary/bin/darshan-job-summary.pl.in
=====================================
@@ -54,7 +54,10 @@ if ($verbose_flag)
print "verbose: $tmp_dir\n";
}
-open(PARSE_OUT, "$darshan_parser --base --perf $input_file |") || die("Can't execute \"$darshan_parser $input_file\": $!\n");
+# Note that we use the --show-incomplete option here because
+# darshan-job-summary will display a clear warning if this condition is
+# encountered anyway.
+open(PARSE_OUT, "$darshan_parser --show-incomplete --base --perf $input_file |") || die("Can't execute \"$darshan_parser $input_file\": $!\n");
open(FA_READ, ">$tmp_dir/file-access-read.dat") || die("error opening output file: $!\n");
open(FA_WRITE, ">$tmp_dir/file-access-write.dat") || die("error opening output file: $!\n");
=====================================
darshan-util/darshan-job-summary/share/summary.tex
=====================================
@@ -37,8 +37,9 @@
\twocolumn[
\vspace{3.5in}
\center
-{\bf \textcolor{red}{WARNING}}: This Darshan log contains incomplete data
-which may skew results in this document.
+{\bf \textcolor{red}{WARNING}}: This Darshan log contains incomplete data.
+This happens when a module runs out of memory to store new record data.
+Please run darshan-parser on the log file for more information.
\endcenter
]
\newpage
=====================================
darshan-util/darshan-parser.c
=====================================
@@ -29,13 +29,15 @@
#define OPTION_FILE (1 << 3) /* file count totals */
#define OPTION_FILE_LIST (1 << 4) /* per-file summaries */
#define OPTION_FILE_LIST_DETAILED (1 << 6) /* per-file summaries with extra detail */
+#define OPTION_SHOW_INCOMPLETE (1 << 7) /* show what we have, even if log is incomplete */
#define OPTION_ALL (\
OPTION_BASE|\
OPTION_TOTAL|\
OPTION_PERF|\
OPTION_FILE|\
OPTION_FILE_LIST|\
- OPTION_FILE_LIST_DETAILED)
+ OPTION_FILE_LIST_DETAILED|\
+ OPTION_SHOW_INCOMPLETE)
#define FILETYPE_SHARED (1 << 0)
#define FILETYPE_UNIQUE (1 << 1)
@@ -131,6 +133,7 @@ int usage (char *exename)
fprintf(stderr, " --file-list-detailed : per-file summaries with additional detail\n");
fprintf(stderr, " --perf : derived perf data\n");
fprintf(stderr, " --total : aggregated darshan field data\n");
+ fprintf(stderr, " --show-incomplete : display results even if log is incomplete\n");
exit(1);
}
@@ -148,6 +151,7 @@ int parse_args (int argc, char **argv, char **filename)
{"file-list-detailed", 0, NULL, OPTION_FILE_LIST_DETAILED},
{"perf", 0, NULL, OPTION_PERF},
{"total", 0, NULL, OPTION_TOTAL},
+ {"show-incomplete", 0, NULL, OPTION_SHOW_INCOMPLETE},
{"help", 0, NULL, 0},
{0, 0, 0, 0}
};
@@ -169,6 +173,7 @@ int parse_args (int argc, char **argv, char **filename)
case OPTION_FILE_LIST_DETAILED:
case OPTION_PERF:
case OPTION_TOTAL:
+ case OPTION_SHOW_INCOMPLETE:
mask |= c;
break;
case 0:
@@ -189,9 +194,9 @@ int parse_args (int argc, char **argv, char **filename)
}
/* default mask value if none specified */
- if (mask == 0)
+ if (mask == 0 || mask == OPTION_SHOW_INCOMPLETE)
{
- mask = OPTION_BASE;
+ mask |= OPTION_BASE;
}
return mask;
@@ -326,7 +331,7 @@ int main(int argc, char **argv)
printf("# record table: %zu bytes (compressed)\n", fd->name_map.len);
for(i=0; i<DARSHAN_MAX_MODS; i++)
{
- if(fd->mod_map[i].len)
+ if(fd->mod_map[i].len || DARSHAN_MOD_FLAG_ISSET(fd->partial_flag, i))
{
printf("# %s module: %zu bytes (compressed), ver=%d\n",
darshan_module_names[i], fd->mod_map[i].len, fd->mod_ver[i]);
@@ -357,7 +362,6 @@ int main(int argc, char **argv)
printf("# <fs type>: type of file system that the file resides on.\n");
}
- /* warn user if this log file is incomplete */
pdata.rank_cumul_io_time = malloc(sizeof(double)*job.nprocs);
pdata.rank_cumul_md_time = malloc(sizeof(double)*job.nprocs);
if (!pdata.rank_cumul_io_time || !pdata.rank_cumul_md_time)
@@ -386,7 +390,8 @@ int main(int argc, char **argv)
if(fd->mod_map[i].len == 0)
{
empty_mods++;
- continue;
+ if(!DARSHAN_MOD_FLAG_ISSET(fd->partial_flag, i))
+ continue;
}
/* skip modules with no logutil definitions */
else if(!mod_logutils[i])
@@ -413,11 +418,43 @@ int main(int argc, char **argv)
printf("# *******************************************************\n");
/* print warning if this module only stored partial data */
- if(DARSHAN_MOD_FLAG_ISSET(fd->partial_flag, i))
- printf("\n# *WARNING*: The %s module contains incomplete data!\n"
- "# This happens when a module runs out of\n"
- "# memory to store new record data.\n",
- darshan_module_names[i]);
+ if(DARSHAN_MOD_FLAG_ISSET(fd->partial_flag, i)) {
+ if(mask & OPTION_SHOW_INCOMPLETE)
+ {
+ /* user requested that we show the data we have anyway */
+ printf("\n# *WARNING*: "
+ "The %s module contains incomplete data!\n"
+ "# This happens when a module runs out of\n"
+ "# memory to store new record data.\n",
+ darshan_module_names[i]);
+ printf(
+ "\n# To avoid this error, consult the darshan-runtime\n"
+ "# documentation and consider setting the\n"
+ "# DARSHAN_EXCLUDE_DIRS environment variable to prevent\n"
+ "# Darshan from instrumenting unecessary files.\n");
+ if(fd->mod_map[i].len == 0)
+ continue; // no data to parse
+ }
+ else
+ {
+ /* hard error */
+ fprintf(stderr, "\n# *ERROR*: "
+ "The %s module contains incomplete data!\n"
+ "# This happens when a module runs out of\n"
+ "# memory to store new record data.\n",
+ darshan_module_names[i]);
+ fprintf(stderr,
+ "\n# To avoid this error, consult the darshan-runtime\n"
+ "# documentation and consider setting the\n"
+ "# DARSHAN_EXCLUDE_DIRS environment variable to prevent\n"
+ "# Darshan from instrumenting unecessary files.\n");
+ fprintf(stderr,
+ "\n# You can display the (incomplete) data that is\n"
+ "# present in this log using the --show-incomplete\n"
+ "# option to darshan-parser.\n");
+ return(-1);
+ }
+ }
if(mask & OPTION_BASE)
{
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/-/compare/80c88c161ecbf3447f06f28007323d070b88f64d...7d4a80abaa2f2bfcc767f4f988fbf9dafff95926
--
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/-/compare/80c88c161ecbf3447f06f28007323d070b88f64d...7d4a80abaa2f2bfcc767f4f988fbf9dafff95926
You're receiving this email because of your account on xgitlab.cels.anl.gov.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20210316/add9c5b4/attachment-0001.html>
More information about the Darshan-commits
mailing list