[Darshan-commits] [Git][darshan/darshan][carns/issue-284-incomplete-warning] improve handling of incomplete logs

Philip Carns xgitlab at cels.anl.gov
Mon Mar 15 16:07:31 CDT 2021



Philip Carns pushed to branch carns/issue-284-incomplete-warning at darshan / darshan


Commits:
ff9791e3 by Phil Carns at 2021-03-15T17:06:05-04:00
improve handling of incomplete logs

- exit with error by default in parsers
- show more verbose error message with tips to help
- add --show-incomplete option to enable previous behavior

- - - - -


5 changed files:

- ChangeLog
- darshan-util/darshan-dxt-parser.c
- darshan-util/darshan-job-summary/bin/darshan-job-summary.pl.in
- darshan-util/darshan-job-summary/share/summary.tex
- darshan-util/darshan-parser.c


Changes:

=====================================
ChangeLog
=====================================
@@ -2,10 +2,12 @@
 Darshan Release Change Log
 --------------------------
 
-Darshan-3.2.2
+Darshan-3.3
 =============
 * add wrappers for preadv, preadv2, pwritev, and pwritev2 (improves profiling
   of ompio)
+* improve error handling in command line parsers to make incomplete logs more
+  obvious to users
 
 Darshan-3.2.1
 =============


=====================================
darshan-util/darshan-dxt-parser.c
=====================================
@@ -20,15 +20,14 @@
 
 #include "darshan-logutils.h"
 
-int usage (char *exename)
-{
-    fprintf(stderr, "Usage: %s <filename>\n", exename);
+#define OPTION_SHOW_INCOMPLETE  (1 << 7)  /* show what we have, even if log is incomplete */
 
-    exit(1);
-}
+static int usage (char *exename);
+static int parse_args (int argc, char **argv, char **filename);
 
 int main(int argc, char **argv)
 {
+    int mask;
     int ret;
     int i, j;
     char *filename;
@@ -49,10 +48,7 @@ int main(int argc, char **argv)
     struct lustre_record_ref *lustre_rec_hash = NULL;
     char *mod_buf = NULL;
 
-    if (argc != 2)
-        usage(argv[0]);
-
-    filename = argv[1];
+    mask = parse_args(argc, argv, &filename);
 
     fd = darshan_log_open(filename);
     if (!fd)
@@ -195,11 +191,44 @@ int main(int argc, char **argv)
             continue;
 
         /* print warning if this module only stored partial data */
-        if(DARSHAN_MOD_FLAG_ISSET(fd->partial_flag, i))
-            printf("\n# *WARNING*: The %s module contains incomplete data!\n"
-                   "#            This happens when a module runs out of\n"
-                   "#            memory to store new record data.\n",
-                   darshan_module_names[i]);
+        if(DARSHAN_MOD_FLAG_ISSET(fd->partial_flag, i)) {
+            if(mask & OPTION_SHOW_INCOMPLETE)
+            {
+                /* user requested that we show the data we have anyway */
+                printf("\n# *WARNING*: "
+                       "The %s module contains incomplete data!\n"
+                       "#            This happens when a module runs out of\n"
+                       "#            memory to store new record data.\n",
+                       darshan_module_names[i]);
+                printf(
+                       "\n# To avoid this error, consult the darshan-runtime\n"
+                       "# documentation and consider setting the\n"
+                       "# DARSHAN_EXCLUDE_DIRS or DXT_TRIGGER_CONF_PATH\n"
+                       "# environment variable to prevent Darshan from\n"
+                       "# instrumenting unecessary files.\n");
+            }
+            else
+            {
+                /* hard error */
+                fprintf(stderr, "\n# *ERROR*: "
+                       "The %s module contains incomplete data!\n"
+                       "#            This happens when a module runs out of\n"
+                       "#            memory to store new record data.\n",
+                       darshan_module_names[i]);
+                fprintf(stderr,
+                       "\n# To avoid this error, consult the darshan-runtime\n"
+                       "# documentation and consider setting the\n"
+                       "# DARSHAN_EXCLUDE_DIRS or DXT_TRIGGER_CONF_PATH\n"
+                       "# environment variable to prevent Darshan from\n"
+                       "# instrumenting unecessary files.\n");
+                fprintf(stderr,
+                        "\n# You can display the (incomplete) data that is\n"
+                        "# present in this log using the --show-incomplete\n"
+                        "# option to darshan-dxt-parser.\n");
+                return(-1);
+            }
+
+        }
 
         /* loop over each of this module's records and print them */
         while(1)
@@ -311,6 +340,59 @@ cleanup:
     return(ret);
 }
 
+static int parse_args (int argc, char **argv, char **filename)
+{
+    int index;
+    int mask;
+    static struct option long_opts[] =
+    {
+        {"show-incomplete", 0, NULL, OPTION_SHOW_INCOMPLETE},
+        {"help",  0, NULL, 0},
+        {0, 0, 0, 0}
+    };
+
+    mask = 0;
+
+    while(1)
+    {
+        int c = getopt_long(argc, argv, "", long_opts, &index);
+
+        if (c == -1) break;
+
+        switch(c)
+        {
+            case OPTION_SHOW_INCOMPLETE:
+                mask |= c;
+                break;
+            case 0:
+            case '?':
+            default:
+                usage(argv[0]);
+                break;
+        }
+    }
+
+    if (optind < argc)
+    {
+        *filename = argv[optind];
+    }
+    else
+    {
+        usage(argv[0]);
+    }
+
+    return mask;
+}
+
+static int usage (char *exename)
+{
+    fprintf(stderr, "Usage: %s [options] <filename>\n", exename);
+    fprintf(stderr, "    --show-incomplete : display results even if log is incomplete\n");
+
+    exit(1);
+}
+
+
 /*
  * Local variables:
  *  c-indent-level: 4


=====================================
darshan-util/darshan-job-summary/bin/darshan-job-summary.pl.in
=====================================
@@ -54,7 +54,10 @@ if ($verbose_flag)
     print "verbose: $tmp_dir\n";
 }
 
-open(PARSE_OUT, "$darshan_parser --base --perf $input_file |") || die("Can't execute \"$darshan_parser $input_file\": $!\n");
+# Note that we use the --show-incomplete option here because
+# darshan-job-summary will display a clear warning if this condition is
+# encountered anyway.
+open(PARSE_OUT, "$darshan_parser --show-incomplete --base --perf $input_file |") || die("Can't execute \"$darshan_parser $input_file\": $!\n");
 
 open(FA_READ, ">$tmp_dir/file-access-read.dat") || die("error opening output file: $!\n");
 open(FA_WRITE, ">$tmp_dir/file-access-write.dat") || die("error opening output file: $!\n");


=====================================
darshan-util/darshan-job-summary/share/summary.tex
=====================================
@@ -37,8 +37,9 @@
 \twocolumn[
 \vspace{3.5in}
 \center
-{\bf \textcolor{red}{WARNING}}: This Darshan log contains incomplete data
-which may skew results in this document.
+{\bf \textcolor{red}{WARNING}}: This Darshan log contains incomplete data.
+This happens when a module runs out of memory to store new record data.
+Please run darshan-parser on the log file for more information.
 \endcenter
 ]
 \newpage


=====================================
darshan-util/darshan-parser.c
=====================================
@@ -29,13 +29,15 @@
 #define OPTION_FILE  (1 << 3)  /* file count totals */
 #define OPTION_FILE_LIST  (1 << 4)  /* per-file summaries */
 #define OPTION_FILE_LIST_DETAILED  (1 << 6)  /* per-file summaries with extra detail */
+#define OPTION_SHOW_INCOMPLETE  (1 << 7)  /* show what we have, even if log is incomplete */
 #define OPTION_ALL (\
   OPTION_BASE|\
   OPTION_TOTAL|\
   OPTION_PERF|\
   OPTION_FILE|\
   OPTION_FILE_LIST|\
-  OPTION_FILE_LIST_DETAILED)
+  OPTION_FILE_LIST_DETAILED|\
+  OPTION_SHOW_INCOMPLETE)
 
 #define FILETYPE_SHARED (1 << 0)
 #define FILETYPE_UNIQUE (1 << 1)
@@ -131,6 +133,7 @@ int usage (char *exename)
     fprintf(stderr, "    --file-list-detailed  : per-file summaries with additional detail\n");
     fprintf(stderr, "    --perf  : derived perf data\n");
     fprintf(stderr, "    --total : aggregated darshan field data\n");
+    fprintf(stderr, "    --show-incomplete : display results even if log is incomplete\n");
 
     exit(1);
 }
@@ -148,6 +151,7 @@ int parse_args (int argc, char **argv, char **filename)
         {"file-list-detailed",  0, NULL, OPTION_FILE_LIST_DETAILED},
         {"perf",  0, NULL, OPTION_PERF},
         {"total", 0, NULL, OPTION_TOTAL},
+        {"show-incomplete", 0, NULL, OPTION_SHOW_INCOMPLETE},
         {"help",  0, NULL, 0},
         {0, 0, 0, 0}
     };
@@ -169,6 +173,7 @@ int parse_args (int argc, char **argv, char **filename)
             case OPTION_FILE_LIST_DETAILED:
             case OPTION_PERF:
             case OPTION_TOTAL:
+            case OPTION_SHOW_INCOMPLETE:
                 mask |= c;
                 break;
             case 0:
@@ -189,9 +194,9 @@ int parse_args (int argc, char **argv, char **filename)
     }
 
     /* default mask value if none specified */
-    if (mask == 0)
+    if (mask == 0 || mask == OPTION_SHOW_INCOMPLETE)
     {
-        mask = OPTION_BASE;
+        mask |= OPTION_BASE;
     }
 
     return mask;
@@ -357,7 +362,6 @@ int main(int argc, char **argv)
         printf("#   <fs type>: type of file system that the file resides on.\n");
     }
 
-    /* warn user if this log file is incomplete */
     pdata.rank_cumul_io_time = malloc(sizeof(double)*job.nprocs);
     pdata.rank_cumul_md_time = malloc(sizeof(double)*job.nprocs);
     if (!pdata.rank_cumul_io_time || !pdata.rank_cumul_md_time)
@@ -413,11 +417,41 @@ int main(int argc, char **argv)
         printf("# *******************************************************\n");
 
         /* print warning if this module only stored partial data */
-        if(DARSHAN_MOD_FLAG_ISSET(fd->partial_flag, i))
-            printf("\n# *WARNING*: The %s module contains incomplete data!\n"
-                   "#            This happens when a module runs out of\n"
-                   "#            memory to store new record data.\n",
-                   darshan_module_names[i]);
+        if(DARSHAN_MOD_FLAG_ISSET(fd->partial_flag, i)) {
+            if(mask & OPTION_SHOW_INCOMPLETE)
+            {
+                /* user requested that we show the data we have anyway */
+                printf("\n# *WARNING*: "
+                       "The %s module contains incomplete data!\n"
+                       "#            This happens when a module runs out of\n"
+                       "#            memory to store new record data.\n",
+                       darshan_module_names[i]);
+                printf(
+                       "\n# To avoid this error, consult the darshan-runtime\n"
+                       "# documentation and consider setting the\n"
+                       "# DARSHAN_EXCLUDE_DIRS environment variable to prevent\n"
+                       "# Darshan from instrumenting unecessary files.\n");
+            }
+            else
+            {
+                /* hard error */
+                fprintf(stderr, "\n# *ERROR*: "
+                       "The %s module contains incomplete data!\n"
+                       "#            This happens when a module runs out of\n"
+                       "#            memory to store new record data.\n",
+                       darshan_module_names[i]);
+                fprintf(stderr,
+                       "\n# To avoid this error, consult the darshan-runtime\n"
+                       "# documentation and consider setting the\n"
+                       "# DARSHAN_EXCLUDE_DIRS environment variable to prevent\n"
+                       "# Darshan from instrumenting unecessary files.\n");
+                fprintf(stderr,
+                        "\n# You can display the (incomplete) data that is\n"
+                        "# present in this log using the --show-incomplete\n"
+                        "# option to darshan-parser.\n");
+                return(-1);
+            }
+        }
 
         if(mask & OPTION_BASE)
         {



View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/-/commit/ff9791e3e7adfc95896b237155d9a43da4d63808

-- 
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/-/commit/ff9791e3e7adfc95896b237155d9a43da4d63808
You're receiving this email because of your account on xgitlab.cels.anl.gov.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20210315/4b3b669a/attachment-0001.html>


More information about the Darshan-commits mailing list