[Darshan-commits] [Git][darshan/darshan][master] add close_start and open_end timers

Shane Snyder xgitlab at cels.anl.gov
Wed Jan 16 14:33:58 CST 2019


Shane Snyder pushed to branch master at darshan / darshan


Commits:
598a2e85 by Shane Snyder at 2019-01-16T20:30:22Z
add close_start and open_end timers

for MPIIO, PNETCDF, and HDF5 modules.

- - - - -


10 changed files:

- darshan-hdf5-log-format.h
- darshan-mpiio-log-format.h
- darshan-pnetcdf-log-format.h
- darshan-runtime/lib/darshan-hdf5.c
- darshan-runtime/lib/darshan-mpiio.c
- darshan-runtime/lib/darshan-pnetcdf.c
- darshan-util/darshan-hdf5-logutils.c
- darshan-util/darshan-mpiio-logutils.c
- darshan-util/darshan-parser.c
- darshan-util/darshan-pnetcdf-logutils.c


Changes:

=====================================
darshan-hdf5-log-format.h
=====================================
@@ -8,7 +8,7 @@
 #define __DARSHAN_HDF5_LOG_FORMAT_H
 
 /* current HDF5 log format version */
-#define DARSHAN_HDF5_VER 1
+#define DARSHAN_HDF5_VER 2
 
 #define HDF5_COUNTERS \
     /* count of HDF5 opens */\
@@ -18,9 +18,13 @@
 
 #define HDF5_F_COUNTERS \
     /* timestamp of first open */\
-    X(HDF5_F_OPEN_TIMESTAMP) \
+    X(HDF5_F_OPEN_START_TIMESTAMP) \
+    /* timestamp of first close */\
+    X(HDF5_F_CLOSE_START_TIMESTAMP) \
+    /* timestamp of last open */\
+    X(HDF5_F_OPEN_END_TIMESTAMP) \
     /* timestamp of last close */\
-    X(HDF5_F_CLOSE_TIMESTAMP) \
+    X(HDF5_F_CLOSE_END_TIMESTAMP) \
     /* end of counters*/\
     X(HDF5_F_NUM_INDICES)
 


=====================================
darshan-mpiio-log-format.h
=====================================
@@ -8,7 +8,7 @@
 #define __DARSHAN_MPIIO_LOG_FORMAT_H
 
 /* current MPI-IO log format version */
-#define DARSHAN_MPIIO_VER 2
+#define DARSHAN_MPIIO_VER 3
 
 /* TODO: maybe use a counter to track cases in which a derived datatype is used? */
 
@@ -92,17 +92,21 @@
 
 #define MPIIO_F_COUNTERS \
     /* timestamp of first open */\
-    X(MPIIO_F_OPEN_TIMESTAMP) \
+    X(MPIIO_F_OPEN_START_TIMESTAMP) \
     /* timestamp of first read */\
     X(MPIIO_F_READ_START_TIMESTAMP) \
     /* timestamp of first write */\
     X(MPIIO_F_WRITE_START_TIMESTAMP) \
+    /* timestamp of first close */\
+    X(MPIIO_F_CLOSE_START_TIMESTAMP) \
+    /* timestamp of last open */\
+    X(MPIIO_F_OPEN_END_TIMESTAMP) \
     /* timestamp of last read */\
     X(MPIIO_F_READ_END_TIMESTAMP) \
     /* timestamp of last write */\
     X(MPIIO_F_WRITE_END_TIMESTAMP) \
     /* timestamp of last close */\
-    X(MPIIO_F_CLOSE_TIMESTAMP) \
+    X(MPIIO_F_CLOSE_END_TIMESTAMP) \
     /* cumulative MPI-IO read time */\
     X(MPIIO_F_READ_TIME) \
     /* cumulative MPI-IO write time */\


=====================================
darshan-pnetcdf-log-format.h
=====================================
@@ -8,7 +8,7 @@
 #define __DARSHAN_PNETCDF_LOG_FORMAT_H
 
 /* current PNETCDF log format version */
-#define DARSHAN_PNETCDF_VER 1
+#define DARSHAN_PNETCDF_VER 2
 
 #define PNETCDF_COUNTERS \
     /* count of PNETCDF independent opens */\
@@ -20,9 +20,13 @@
 
 #define PNETCDF_F_COUNTERS \
     /* timestamp of first open */\
-    X(PNETCDF_F_OPEN_TIMESTAMP) \
+    X(PNETCDF_F_OPEN_START_TIMESTAMP) \
+    /* timestamp of first close */\
+    X(PNETCDF_F_CLOSE_START_TIMESTAMP) \
+    /* timestamp of last open */\
+    X(PNETCDF_F_OPEN_END_TIMESTAMP) \
     /* timestamp of last close */\
-    X(PNETCDF_F_CLOSE_TIMESTAMP) \
+    X(PNETCDF_F_CLOSE_END_TIMESTAMP) \
     /* end of counters*/\
     X(PNETCDF_F_NUM_INDICES)
 


=====================================
darshan-runtime/lib/darshan-hdf5.c
=====================================
@@ -90,7 +90,7 @@ static int my_rank = -1;
     HDF5_UNLOCK(); \
 } while(0)
 
-#define HDF5_RECORD_OPEN(__ret, __path, __tm1) do { \
+#define HDF5_RECORD_OPEN(__ret, __path, __tm1, __tm2) do { \
     darshan_record_id rec_id; \
     struct hdf5_file_record_ref *rec_ref; \
     char *newpath; \
@@ -107,8 +107,10 @@ static int my_rank = -1;
         if(newpath != __path) free(newpath); \
         break; \
     } \
-    if(rec_ref->file_rec->fcounters[HDF5_F_OPEN_TIMESTAMP] == 0) \
-        rec_ref->file_rec->fcounters[HDF5_F_OPEN_TIMESTAMP] = __tm1; \
+    if(rec_ref->file_rec->fcounters[HDF5_F_OPEN_START_TIMESTAMP] == 0 || \
+     rec_ref->file_rec->fcounters[HDF5_F_OPEN_START_TIMESTAMP] > __tm1) \
+        rec_ref->file_rec->fcounters[HDF5_F_OPEN_START_TIMESTAMP] = __tm1; \
+    rec_ref->file_rec->fcounters[HDF5_F_OPEN_END_TIMESTAMP] = __tm2; \
     rec_ref->file_rec->counters[HDF5_OPENS] += 1; \
     darshan_add_record_ref(&(hdf5_runtime->hid_hash), &__ret, sizeof(hid_t), rec_ref); \
     if(newpath != __path) free(newpath); \
@@ -123,7 +125,7 @@ hid_t DARSHAN_DECL(H5Fcreate)(const char *filename, unsigned flags,
 {
     hid_t ret;
     char* tmp;
-    double tm1;
+    double tm1, tm2;
     unsigned majnum, minnum, relnum;
 
     H5get_libversion(&majnum, &minnum, &relnum);
@@ -146,6 +148,7 @@ hid_t DARSHAN_DECL(H5Fcreate)(const char *filename, unsigned flags,
 
     tm1 = darshan_core_wtime();
     ret = __real_H5Fcreate(filename, flags, create_plist, access_plist);
+    tm2 = darshan_core_wtime();
     if(ret >= 0)
     {
         /* use ROMIO approach to strip prefix if present */
@@ -159,7 +162,7 @@ hid_t DARSHAN_DECL(H5Fcreate)(const char *filename, unsigned flags,
         }
 
         HDF5_PRE_RECORD();
-        HDF5_RECORD_OPEN(ret, filename, tm1);
+        HDF5_RECORD_OPEN(ret, filename, tm1, tm2);
         HDF5_POST_RECORD();
     }
 
@@ -171,7 +174,7 @@ hid_t DARSHAN_DECL(H5Fopen)(const char *filename, unsigned flags,
 {
     hid_t ret;
     char* tmp;
-    double tm1;
+    double tm1, tm2;
     unsigned majnum, minnum, relnum;
 
     H5get_libversion(&majnum, &minnum, &relnum);
@@ -194,6 +197,7 @@ hid_t DARSHAN_DECL(H5Fopen)(const char *filename, unsigned flags,
 
     tm1 = darshan_core_wtime();
     ret = __real_H5Fopen(filename, flags, access_plist);
+    tm2 = darshan_core_wtime();
     if(ret >= 0)
     {
         /* use ROMIO approach to strip prefix if present */
@@ -207,7 +211,7 @@ hid_t DARSHAN_DECL(H5Fopen)(const char *filename, unsigned flags,
         }
 
         HDF5_PRE_RECORD();
-        HDF5_RECORD_OPEN(ret, filename, tm1);
+        HDF5_RECORD_OPEN(ret, filename, tm1, tm2);
         HDF5_POST_RECORD();
     }
 
@@ -218,19 +222,24 @@ hid_t DARSHAN_DECL(H5Fopen)(const char *filename, unsigned flags,
 herr_t DARSHAN_DECL(H5Fclose)(hid_t file_id)
 {
     struct hdf5_file_record_ref *rec_ref;
+    double tm1, tm2;
     herr_t ret;
 
     MAP_OR_FAIL(H5Fclose);
 
+    tm1 = darshan_core_wtime();
     ret = __real_H5Fclose(file_id);
+    tm2 = darshan_core_wtime();
 
     HDF5_PRE_RECORD();
     rec_ref = darshan_lookup_record_ref(hdf5_runtime->hid_hash,
         &file_id, sizeof(hid_t));
     if(rec_ref)
     {
-        rec_ref->file_rec->fcounters[HDF5_F_CLOSE_TIMESTAMP] =
-            darshan_core_wtime();
+        if(rec_ref->file_rec->fcounters[HDF5_F_CLOSE_START_TIMESTAMP] == 0 ||
+         rec_ref->file_rec->fcounters[HDF5_F_CLOSE_START_TIMESTAMP] > tm1)
+           rec_ref->file_rec->fcounters[HDF5_F_CLOSE_START_TIMESTAMP] = tm1;
+        rec_ref->file_rec->fcounters[HDF5_F_CLOSE_END_TIMESTAMP] = tm2;
         darshan_delete_record_ref(&(hdf5_runtime->hid_hash),
             &file_id, sizeof(hid_t));
     }
@@ -349,7 +358,7 @@ static void hdf5_record_reduction_op(void* infile_v, void* inoutfile_v,
         }
 
         /* min non-zero (if available) value */
-        for(j=HDF5_F_OPEN_TIMESTAMP; j<=HDF5_F_OPEN_TIMESTAMP; j++)
+        for(j=HDF5_F_OPEN_START_TIMESTAMP; j<=HDF5_F_CLOSE_START_TIMESTAMP; j++)
         {
             if((infile->fcounters[j] < inoutfile->fcounters[j] &&
                infile->fcounters[j] > 0) || inoutfile->fcounters[j] == 0) 
@@ -359,7 +368,7 @@ static void hdf5_record_reduction_op(void* infile_v, void* inoutfile_v,
         }
 
         /* max */
-        for(j=HDF5_F_CLOSE_TIMESTAMP; j<=HDF5_F_CLOSE_TIMESTAMP; j++)
+        for(j=HDF5_F_OPEN_END_TIMESTAMP; j<=HDF5_F_CLOSE_END_TIMESTAMP; j++)
         {
             if(infile->fcounters[j] > inoutfile->fcounters[j])
                 tmp_file.fcounters[j] = infile->fcounters[j];


=====================================
darshan-runtime/lib/darshan-mpiio.c
=====================================
@@ -229,9 +229,10 @@ static int enable_dxt_io_trace = 0;
         rec_ref->file_rec->counters[MPIIO_COLL_OPENS] += 1; \
     if(__info != MPI_INFO_NULL) \
         rec_ref->file_rec->counters[MPIIO_HINTS] += 1; \
-    if(rec_ref->file_rec->fcounters[MPIIO_F_OPEN_TIMESTAMP] == 0 || \
-     rec_ref->file_rec->fcounters[MPIIO_F_OPEN_TIMESTAMP] > __tm1) \
-        rec_ref->file_rec->fcounters[MPIIO_F_OPEN_TIMESTAMP] = __tm1; \
+    if(rec_ref->file_rec->fcounters[MPIIO_F_OPEN_START_TIMESTAMP] == 0 || \
+     rec_ref->file_rec->fcounters[MPIIO_F_OPEN_START_TIMESTAMP] > __tm1) \
+        rec_ref->file_rec->fcounters[MPIIO_F_OPEN_START_TIMESTAMP] = __tm1; \
+    rec_ref->file_rec->fcounters[MPIIO_F_OPEN_END_TIMESTAMP] = __tm2; \
     DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[MPIIO_F_META_TIME], \
         __tm1, __tm2, rec_ref->last_meta_end); \
     darshan_add_record_ref(&(mpiio_runtime->fh_hash), &__fh, sizeof(MPI_File), rec_ref); \
@@ -1090,8 +1091,10 @@ int DARSHAN_DECL(MPI_File_close)(MPI_File *fh)
         &tmp_fh, sizeof(MPI_File));
     if(rec_ref)
     {
-        rec_ref->file_rec->fcounters[MPIIO_F_CLOSE_TIMESTAMP] =
-            darshan_core_wtime();
+        if(rec_ref->file_rec->fcounters[MPIIO_F_CLOSE_START_TIMESTAMP] == 0 ||
+         rec_ref->file_rec->fcounters[MPIIO_F_CLOSE_START_TIMESTAMP] > tm1)
+           rec_ref->file_rec->fcounters[MPIIO_F_CLOSE_START_TIMESTAMP] = tm1;
+        rec_ref->file_rec->fcounters[MPIIO_F_CLOSE_END_TIMESTAMP] = tm2;
         DARSHAN_TIMER_INC_NO_OVERLAP(
             rec_ref->file_rec->fcounters[MPIIO_F_META_TIME],
             tm1, tm2, rec_ref->last_meta_end);
@@ -1273,7 +1276,7 @@ static void mpiio_record_reduction_op(
         }
 
         /* min non-zero (if available) value */
-        for(j=MPIIO_F_OPEN_TIMESTAMP; j<=MPIIO_F_WRITE_START_TIMESTAMP; j++)
+        for(j=MPIIO_F_OPEN_START_TIMESTAMP; j<=MPIIO_F_CLOSE_START_TIMESTAMP; j++)
         {
             if((infile->fcounters[j] < inoutfile->fcounters[j] &&
                infile->fcounters[j] > 0) || inoutfile->fcounters[j] == 0)
@@ -1283,7 +1286,7 @@ static void mpiio_record_reduction_op(
         }
 
         /* max */
-        for(j=MPIIO_F_READ_END_TIMESTAMP; j<= MPIIO_F_CLOSE_TIMESTAMP; j++)
+        for(j=MPIIO_F_OPEN_END_TIMESTAMP; j<= MPIIO_F_CLOSE_END_TIMESTAMP; j++)
         {
             if(infile->fcounters[j] > inoutfile->fcounters[j])
                 tmp_file.fcounters[j] = infile->fcounters[j];


=====================================
darshan-runtime/lib/darshan-pnetcdf.c
=====================================
@@ -76,7 +76,7 @@ static int my_rank = -1;
     PNETCDF_UNLOCK(); \
 } while(0)
 
-#define PNETCDF_RECORD_OPEN(__ncidp, __path, __comm, __tm1) do { \
+#define PNETCDF_RECORD_OPEN(__ncidp, __path, __comm, __tm1, __tm2) do { \
     darshan_record_id rec_id; \
     struct pnetcdf_file_record_ref *rec_ref; \
     char *newpath; \
@@ -95,8 +95,10 @@ static int my_rank = -1;
         break; \
     } \
     PMPI_Comm_size(__comm, &comm_size); \
-    if(rec_ref->file_rec->fcounters[PNETCDF_F_OPEN_TIMESTAMP] == 0) \
-        rec_ref->file_rec->fcounters[PNETCDF_F_OPEN_TIMESTAMP] = __tm1; \
+    if(rec_ref->file_rec->fcounters[PNETCDF_F_OPEN_START_TIMESTAMP] == 0 || \
+     rec_ref->file_rec->fcounters[PNETCDF_F_OPEN_START_TIMESTAMP] > __tm1) \
+        rec_ref->file_rec->fcounters[PNETCDF_F_OPEN_START_TIMESTAMP] = __tm1; \
+    rec_ref->file_rec->fcounters[PNETCDF_F_OPEN_END_TIMESTAMP] = __tm2; \
     if(comm_size == 1) rec_ref->file_rec->counters[PNETCDF_INDEP_OPENS] += 1; \
     else rec_ref->file_rec->counters[PNETCDF_COLL_OPENS] += 1; \
     darshan_add_record_ref(&(pnetcdf_runtime->ncid_hash), __ncidp, sizeof(int), rec_ref); \
@@ -112,12 +114,13 @@ int DARSHAN_DECL(ncmpi_create)(MPI_Comm comm, const char *path,
 {
     int ret;
     char* tmp;
-    double tm1;
+    double tm1, tm2;
 
     MAP_OR_FAIL(ncmpi_create);
 
     tm1 = darshan_core_wtime();
     ret = __real_ncmpi_create(comm, path, cmode, info, ncidp);
+    tm2 = darshan_core_wtime();
     if(ret == 0)
     {
         /* use ROMIO approach to strip prefix if present */
@@ -131,7 +134,7 @@ int DARSHAN_DECL(ncmpi_create)(MPI_Comm comm, const char *path,
         }
 
         PNETCDF_PRE_RECORD();
-        PNETCDF_RECORD_OPEN(ncidp, path, comm, tm1);
+        PNETCDF_RECORD_OPEN(ncidp, path, comm, tm1, tm2);
         PNETCDF_POST_RECORD();
     }
 
@@ -143,12 +146,13 @@ int DARSHAN_DECL(ncmpi_open)(MPI_Comm comm, const char *path,
 {
     int ret;
     char* tmp;
-    double tm1;
+    double tm1, tm2;
 
     MAP_OR_FAIL(ncmpi_open);
 
     tm1 = darshan_core_wtime();
     ret = __real_ncmpi_open(comm, path, omode, info, ncidp);
+    tm2 = darshan_core_wtime();
     if(ret == 0)
     {
         /* use ROMIO approach to strip prefix if present */
@@ -162,7 +166,7 @@ int DARSHAN_DECL(ncmpi_open)(MPI_Comm comm, const char *path,
         }
 
         PNETCDF_PRE_RECORD();
-        PNETCDF_RECORD_OPEN(ncidp, path, comm, tm1);
+        PNETCDF_RECORD_OPEN(ncidp, path, comm, tm1, tm2);
         PNETCDF_POST_RECORD();
     }
 
@@ -173,18 +177,23 @@ int DARSHAN_DECL(ncmpi_close)(int ncid)
 {
     struct pnetcdf_file_record_ref *rec_ref;
     int ret;
+    double tm1, tm2;
 
     MAP_OR_FAIL(ncmpi_close);
 
+    tm1 = darshan_core_wtime();
     ret = __real_ncmpi_close(ncid);
+    tm2 = darshan_core_wtime();
 
     PNETCDF_PRE_RECORD();
     rec_ref = darshan_lookup_record_ref(pnetcdf_runtime->ncid_hash,
         &ncid, sizeof(int));
     if(rec_ref)
     {
-        rec_ref->file_rec->fcounters[PNETCDF_F_CLOSE_TIMESTAMP] =
-            darshan_core_wtime();
+        if(rec_ref->file_rec->fcounters[PNETCDF_F_CLOSE_START_TIMESTAMP] == 0 ||
+         rec_ref->file_rec->fcounters[PNETCDF_F_CLOSE_START_TIMESTAMP] > tm1)
+           rec_ref->file_rec->fcounters[PNETCDF_F_CLOSE_START_TIMESTAMP] = tm1;
+        rec_ref->file_rec->fcounters[PNETCDF_F_CLOSE_END_TIMESTAMP] = tm2;
         darshan_delete_record_ref(&(pnetcdf_runtime->ncid_hash),
             &ncid, sizeof(int));
     }
@@ -302,7 +311,7 @@ static void pnetcdf_record_reduction_op(void* infile_v, void* inoutfile_v,
         }
 
         /* min non-zero (if available) value */
-        for(j=PNETCDF_F_OPEN_TIMESTAMP; j<=PNETCDF_F_OPEN_TIMESTAMP; j++)
+        for(j=PNETCDF_F_OPEN_START_TIMESTAMP; j<=PNETCDF_F_CLOSE_START_TIMESTAMP; j++)
         {
             if((infile->fcounters[j] < inoutfile->fcounters[j] &&
                infile->fcounters[j] > 0) || inoutfile->fcounters[j] == 0) 
@@ -312,7 +321,7 @@ static void pnetcdf_record_reduction_op(void* infile_v, void* inoutfile_v,
         }
 
         /* max */
-        for(j=PNETCDF_F_CLOSE_TIMESTAMP; j<=PNETCDF_F_CLOSE_TIMESTAMP; j++)
+        for(j=PNETCDF_F_OPEN_END_TIMESTAMP; j<=PNETCDF_F_CLOSE_END_TIMESTAMP; j++)
         {
             if(infile->fcounters[j] > inoutfile->fcounters[j])
                 tmp_file.fcounters[j] = infile->fcounters[j];


=====================================
darshan-util/darshan-hdf5-logutils.c
=====================================
@@ -30,6 +30,8 @@ char *hdf5_f_counter_names[] = {
 };
 #undef X
 
+#define DARSHAN_HDF5_FILE_SIZE_1 40
+
 static int darshan_log_get_hdf5_file(darshan_fd fd, void** hdf5_buf_p);
 static int darshan_log_put_hdf5_file(darshan_fd fd, void* hdf5_buf);
 static void darshan_log_print_hdf5_file(void *file_rec,
@@ -52,6 +54,7 @@ struct darshan_mod_logutil_funcs hdf5_logutils =
 static int darshan_log_get_hdf5_file(darshan_fd fd, void** hdf5_buf_p)
 {
     struct darshan_hdf5_file *file = *((struct darshan_hdf5_file **)hdf5_buf_p);
+    int rec_len;
     int i;
     int ret;
 
@@ -65,12 +68,42 @@ static int darshan_log_get_hdf5_file(darshan_fd fd, void** hdf5_buf_p)
             return(-1);
     }
 
-    ret = darshan_log_get_mod(fd, DARSHAN_HDF5_MOD, file,
-        sizeof(struct darshan_hdf5_file));
+    if(fd->mod_ver[DARSHAN_HDF5_MOD] == DARSHAN_HDF5_VER)
+    {
+        /* log format is in current version, so we don't need to do any
+         * translation of counters while reading
+         */
+        rec_len = sizeof(struct darshan_hdf5_file);
+        ret = darshan_log_get_mod(fd, DARSHAN_HDF5_MOD, file, rec_len);
+    }
+    else
+    {
+        char scratch[1024] = {0};
+        char *src_p, *dest_p;
+        int len;
+
+        rec_len = DARSHAN_HDF5_FILE_SIZE_1;
+        ret = darshan_log_get_mod(fd, DARSHAN_HDF5_MOD, scratch, rec_len);
+        if(ret != rec_len)
+            goto exit;
+
+        /* upconvert version 1 to version 2 in-place */
+        dest_p = scratch + (sizeof(struct darshan_base_record) +
+            (1 * sizeof(int64_t)) + (3 * sizeof(double)));
+        src_p = dest_p - (2 * sizeof(double));
+        len = sizeof(double);
+        memmove(dest_p, src_p, len);
+        /* set F_CLOSE_START and F_OPEN_END to -1 */
+        *((double *)src_p) = -1;
+        *((double *)(src_p + sizeof(double))) = -1;
+
+        memcpy(file, scratch, sizeof(struct darshan_hdf5_file));
+    }
 
+exit:
     if(*hdf5_buf_p == NULL)
     {
-        if(ret == sizeof(struct darshan_hdf5_file))
+        if(ret == rec_len)
             *hdf5_buf_p = file;
         else
             free(file);
@@ -78,7 +111,7 @@ static int darshan_log_get_hdf5_file(darshan_fd fd, void** hdf5_buf_p)
 
     if(ret < 0)
         return(-1);
-    else if(ret < sizeof(struct darshan_hdf5_file))
+    else if(ret < rec_len)
         return(0);
     else
     {
@@ -90,7 +123,16 @@ static int darshan_log_get_hdf5_file(darshan_fd fd, void** hdf5_buf_p)
             for(i=0; i<HDF5_NUM_INDICES; i++)
                 DARSHAN_BSWAP64(&file->counters[i]);
             for(i=0; i<HDF5_F_NUM_INDICES; i++)
+            {
+                /* skip counters we explicitly set to -1 since they don't
+                 * need to be byte swapped
+                 */
+                if((fd->mod_ver[DARSHAN_HDF5_MOD] == 1) &&
+                    ((i == HDF5_F_CLOSE_START_TIMESTAMP) ||
+                     (i == HDF5_F_OPEN_END_TIMESTAMP)))
+                    continue;
                 DARSHAN_BSWAP64(&file->fcounters[i]);
+            }
         }
 
         return(1);
@@ -140,8 +182,15 @@ static void darshan_log_print_hdf5_description(int ver)
 {
     printf("\n# description of HDF5 counters:\n");
     printf("#   HDF5_OPENS: HDF5 file open operation counts.\n");
-    printf("#   HDF5_F_OPEN_TIMESTAMP: timestamp of first HDF5 file open.\n");
-    printf("#   HDF5_F_CLOSE_TIMESTAMP: timestamp of last HDF5 file close.\n");
+    printf("#   HDF5_F_*_START_TIMESTAMP: timestamp of first HDF5 file open/close.\n");
+    printf("#   HDF5_F_*_END_TIMESTAMP: timestamp of last HDF5 file open/close.\n");
+
+    if(ver == 1)
+    {
+        printf("\n# WARNING: HDF5 module log format version 1 does not support the following counters:\n");
+        printf("# - HDF5_F_CLOSE_START_TIMESTAMP\n");
+        printf("# - HDF5_F_OPEN_END_TIMESTAMP\n");
+    }
 
     return;
 }
@@ -242,7 +291,8 @@ static void darshan_log_agg_hdf5_files(void *rec, void *agg_rec, int init_flag)
     {
         switch(i)
         {
-            case HDF5_F_OPEN_TIMESTAMP:
+            case HDF5_F_OPEN_START_TIMESTAMP:
+            case HDF5_F_CLOSE_START_TIMESTAMP:
                 /* minimum non-zero */
                 if((hdf5_rec->fcounters[i] > 0)  &&
                     ((agg_hdf5_rec->fcounters[i] == 0) ||
@@ -251,7 +301,8 @@ static void darshan_log_agg_hdf5_files(void *rec, void *agg_rec, int init_flag)
                     agg_hdf5_rec->fcounters[i] = hdf5_rec->fcounters[i];
                 }
                 break;
-            case HDF5_F_CLOSE_TIMESTAMP:
+            case HDF5_F_OPEN_END_TIMESTAMP:
+            case HDF5_F_CLOSE_END_TIMESTAMP:
                 /* maximum */
                 if(hdf5_rec->fcounters[i] > agg_hdf5_rec->fcounters[i])
                 {


=====================================
darshan-util/darshan-mpiio-logutils.c
=====================================
@@ -30,6 +30,8 @@ char *mpiio_f_counter_names[] = {
 };
 #undef X
 
+#define DARSHAN_MPIIO_FILE_SIZE_1 544
+
 static int darshan_log_get_mpiio_file(darshan_fd fd, void** mpiio_buf_p);
 static int darshan_log_put_mpiio_file(darshan_fd fd, void* mpiio_buf);
 static void darshan_log_print_mpiio_file(void *file_rec,
@@ -52,6 +54,7 @@ struct darshan_mod_logutil_funcs mpiio_logutils =
 static int darshan_log_get_mpiio_file(darshan_fd fd, void** mpiio_buf_p)
 {
     struct darshan_mpiio_file *file = *((struct darshan_mpiio_file **)mpiio_buf_p);
+    int rec_len;
     int i;
     int ret;
 
@@ -64,13 +67,43 @@ static int darshan_log_get_mpiio_file(darshan_fd fd, void** mpiio_buf_p)
         if(!file)
             return(-1);
     }
-    
-    ret = darshan_log_get_mod(fd, DARSHAN_MPIIO_MOD, file,
-        sizeof(struct darshan_mpiio_file));
 
+    if(fd->mod_ver[DARSHAN_MPIIO_MOD] == DARSHAN_MPIIO_VER)
+    {
+        /* log format is in current version, so we don't need to do any
+         * translation of counters while reading
+         */
+        rec_len = sizeof(struct darshan_mpiio_file);
+        ret = darshan_log_get_mod(fd, DARSHAN_MPIIO_MOD, file, rec_len);
+    }
+    else
+    {
+        char scratch[1024] = {0};
+        char *src_p, *dest_p;
+        int len;
+
+        rec_len = DARSHAN_MPIIO_FILE_SIZE_1;
+        ret = darshan_log_get_mod(fd, DARSHAN_MPIIO_MOD, scratch, rec_len);
+        if(ret != rec_len)
+            goto exit;
+
+        /* upconvert versions 1/2 to version 3 in-place */
+        dest_p = scratch + (sizeof(struct darshan_base_record) +
+            (51 * sizeof(int64_t)) + (5 * sizeof(double)));
+        src_p = dest_p - (2 * sizeof(double));
+        len = (12 * sizeof(double));
+        memmove(dest_p, src_p, len);
+        /* set F_CLOSE_START and F_OPEN_END to -1 */
+        *((double *)src_p) = -1;
+        *((double *)(src_p + sizeof(double))) = -1;
+
+        memcpy(file, scratch, sizeof(struct darshan_mpiio_file));
+    }
+   
+exit:
     if(*mpiio_buf_p == NULL)
     {
-        if(ret == sizeof(struct darshan_mpiio_file))
+        if(ret == rec_len)
             *mpiio_buf_p = file;
         else
             free(file);
@@ -78,7 +111,7 @@ static int darshan_log_get_mpiio_file(darshan_fd fd, void** mpiio_buf_p)
 
     if(ret < 0)
         return(-1);
-    else if(ret < sizeof(struct darshan_mpiio_file))
+    else if(ret < rec_len)
         return(0);
     else
     {
@@ -90,7 +123,16 @@ static int darshan_log_get_mpiio_file(darshan_fd fd, void** mpiio_buf_p)
             for(i=0; i<MPIIO_NUM_INDICES; i++)
                 DARSHAN_BSWAP64(&file->counters[i]);
             for(i=0; i<MPIIO_F_NUM_INDICES; i++)
+            {
+                /* skip counters we explicitly set to -1 since they don't
+                 * need to be byte swapped
+                 */
+                if((fd->mod_ver[DARSHAN_MPIIO_MOD] < 3) &&
+                    ((i == MPIIO_F_CLOSE_START_TIMESTAMP) ||
+                     (i == MPIIO_F_OPEN_END_TIMESTAMP)))
+                    continue;
                 DARSHAN_BSWAP64(&file->fcounters[i]);
+            }
         }
 
         return(1);
@@ -156,20 +198,24 @@ static void darshan_log_print_mpiio_description(int ver)
     printf("#   MPIIO_ACCESS*_COUNT: count of the four most common total access sizes.\n");
     printf("#   MPIIO_*_RANK: rank of the processes that were the fastest and slowest at I/O (for shared files).\n");
     printf("#   MPIIO_*_RANK_BYTES: total bytes transferred at MPI-IO layer by the fastest and slowest ranks (for shared files).\n");
-    printf("#   MPIIO_F_OPEN_TIMESTAMP: timestamp of first open.\n");
-    printf("#   MPIIO_F_*_START_TIMESTAMP: timestamp of first MPI-IO read/write.\n");
-    printf("#   MPIIO_F_*_END_TIMESTAMP: timestamp of last MPI-IO read/write.\n");
-    printf("#   MPIIO_F_CLOSE_TIMESTAMP: timestamp of last close.\n");
+    printf("#   MPIIO_F_*_START_TIMESTAMP: timestamp of first MPI-IO open/read/write/close.\n");
+    printf("#   MPIIO_F_*_END_TIMESTAMP: timestamp of last MPI-IO open/read/write/close.\n");
     printf("#   MPIIO_F_READ/WRITE/META_TIME: cumulative time spent in MPI-IO read, write, or metadata operations.\n");
     printf("#   MPIIO_F_MAX_*_TIME: duration of the slowest MPI-IO read and write operations.\n");
     printf("#   MPIIO_F_*_RANK_TIME: fastest and slowest I/O time for a single rank (for shared files).\n");
     printf("#   MPIIO_F_VARIANCE_RANK_*: variance of total I/O time and bytes moved for all ranks (for shared files).\n");
 
-    if(ver < 2)
+    if(ver == 1)
     {
         printf("\n# WARNING: MPIIO module log format version 1 has the following limitations:\n");
         printf("# - MPIIO_F_WRITE_START_TIMESTAMP may not be accurate.\n");
     }
+    if(ver <= 2)
+    {
+        printf("\n# WARNING: MPIIO module log format version <=2 does not support the following counters:\n");
+        printf("# - MPIIO_F_CLOSE_START_TIMESTAMP\n");
+        printf("# - MPIIO_F_OPEN_END_TIMESTAMP\n");
+    }
 
     return;
 }
@@ -422,9 +468,10 @@ static void darshan_log_agg_mpiio_files(void *rec, void *agg_rec, int init_flag)
                 /* sum */
                 agg_mpi_rec->fcounters[i] += mpi_rec->fcounters[i];
                 break;
-            case MPIIO_F_OPEN_TIMESTAMP:
+            case MPIIO_F_OPEN_START_TIMESTAMP:
             case MPIIO_F_READ_START_TIMESTAMP:
             case MPIIO_F_WRITE_START_TIMESTAMP:
+            case MPIIO_F_CLOSE_START_TIMESTAMP:
                 /* minimum non-zero */
                 if((mpi_rec->fcounters[i] > 0)  &&
                     ((agg_mpi_rec->fcounters[i] == 0) ||
@@ -433,9 +480,10 @@ static void darshan_log_agg_mpiio_files(void *rec, void *agg_rec, int init_flag)
                     agg_mpi_rec->fcounters[i] = mpi_rec->fcounters[i];
                 }
                 break;
+            case MPIIO_F_OPEN_END_TIMESTAMP:
             case MPIIO_F_READ_END_TIMESTAMP:
             case MPIIO_F_WRITE_END_TIMESTAMP:
-            case MPIIO_F_CLOSE_TIMESTAMP:
+            case MPIIO_F_CLOSE_END_TIMESTAMP:
                 /* maximum */
                 if(mpi_rec->fcounters[i] > agg_mpi_rec->fcounters[i])
                 {


=====================================
darshan-util/darshan-parser.c
=====================================
@@ -1149,18 +1149,20 @@ void mpiio_accum_file(struct darshan_mpiio_file *mfile,
     {
         switch(i)
         {
-            case MPIIO_F_OPEN_TIMESTAMP:
+            case MPIIO_F_OPEN_START_TIMESTAMP:
             case MPIIO_F_READ_START_TIMESTAMP:
             case MPIIO_F_WRITE_START_TIMESTAMP:
+            case MPIIO_F_CLOSE_START_TIMESTAMP:
                 if(tmp->fcounters[i] == 0 || 
                     tmp->fcounters[i] > mfile->fcounters[i])
                 {
                     tmp->fcounters[i] = mfile->fcounters[i];
                 }
                 break;
+            case MPIIO_F_OPEN_END_TIMESTAMP:
             case MPIIO_F_READ_END_TIMESTAMP:
             case MPIIO_F_WRITE_END_TIMESTAMP:
-            case MPIIO_F_CLOSE_TIMESTAMP:
+            case MPIIO_F_CLOSE_END_TIMESTAMP:
                 if(tmp->fcounters[i] == 0 || 
                     tmp->fcounters[i] < mfile->fcounters[i])
                 {
@@ -1386,12 +1388,12 @@ void mpiio_accum_perf(struct darshan_mpiio_file *mfile,
     if(mfile->base_rec.rank == -1)
     {
         /* by_open */
-        if(mfile->fcounters[MPIIO_F_CLOSE_TIMESTAMP] >
-            mfile->fcounters[MPIIO_F_OPEN_TIMESTAMP])
+        if(mfile->fcounters[MPIIO_F_CLOSE_END_TIMESTAMP] >
+            mfile->fcounters[MPIIO_F_OPEN_START_TIMESTAMP])
         {
             pdata->shared_time_by_open +=
-                mfile->fcounters[MPIIO_F_CLOSE_TIMESTAMP] -
-                mfile->fcounters[MPIIO_F_OPEN_TIMESTAMP];
+                mfile->fcounters[MPIIO_F_CLOSE_END_TIMESTAMP] -
+                mfile->fcounters[MPIIO_F_OPEN_START_TIMESTAMP];
         }
 
         /* by_open_lastio */
@@ -1399,21 +1401,21 @@ void mpiio_accum_perf(struct darshan_mpiio_file *mfile,
             mfile->fcounters[MPIIO_F_WRITE_END_TIMESTAMP])
         {
             /* be careful: file may have been opened but not read or written */
-            if(mfile->fcounters[MPIIO_F_READ_END_TIMESTAMP] > mfile->fcounters[MPIIO_F_OPEN_TIMESTAMP])
+            if(mfile->fcounters[MPIIO_F_READ_END_TIMESTAMP] > mfile->fcounters[MPIIO_F_OPEN_START_TIMESTAMP])
             {
                 pdata->shared_time_by_open_lastio += 
                     mfile->fcounters[MPIIO_F_READ_END_TIMESTAMP] - 
-                    mfile->fcounters[MPIIO_F_OPEN_TIMESTAMP];
+                    mfile->fcounters[MPIIO_F_OPEN_START_TIMESTAMP];
             }
         }
         else
         {
             /* be careful: file may have been opened but not read or written */
-            if(mfile->fcounters[MPIIO_F_WRITE_END_TIMESTAMP] > mfile->fcounters[MPIIO_F_OPEN_TIMESTAMP])
+            if(mfile->fcounters[MPIIO_F_WRITE_END_TIMESTAMP] > mfile->fcounters[MPIIO_F_OPEN_START_TIMESTAMP])
             {
                 pdata->shared_time_by_open_lastio += 
                     mfile->fcounters[MPIIO_F_WRITE_END_TIMESTAMP] - 
-                    mfile->fcounters[MPIIO_F_OPEN_TIMESTAMP];
+                    mfile->fcounters[MPIIO_F_OPEN_START_TIMESTAMP];
             }
         }
 
@@ -2011,7 +2013,7 @@ void mpiio_file_list(hash_entry_t *file_hash,
 
         if(detail_flag)
         {
-            for(i=MPIIO_F_OPEN_TIMESTAMP; i<=MPIIO_F_CLOSE_TIMESTAMP; i++)
+            for(i=MPIIO_F_OPEN_START_TIMESTAMP; i<=MPIIO_F_CLOSE_END_TIMESTAMP; i++)
             {
                 printf("\t%f", file_rec->fcounters[i]);
             }


=====================================
darshan-util/darshan-pnetcdf-logutils.c
=====================================
@@ -30,6 +30,8 @@ char *pnetcdf_f_counter_names[] = {
 };
 #undef X
 
+#define DARSHAN_PNETCDF_FILE_SIZE_1 48
+
 static int darshan_log_get_pnetcdf_file(darshan_fd fd, void** pnetcdf_buf_p);
 static int darshan_log_put_pnetcdf_file(darshan_fd fd, void* pnetcdf_buf);
 static void darshan_log_print_pnetcdf_file(void *file_rec,
@@ -52,6 +54,7 @@ struct darshan_mod_logutil_funcs pnetcdf_logutils =
 static int darshan_log_get_pnetcdf_file(darshan_fd fd, void** pnetcdf_buf_p)
 {
     struct darshan_pnetcdf_file *file = *((struct darshan_pnetcdf_file **)pnetcdf_buf_p);
+    int rec_len;
     int i;
     int ret;
 
@@ -65,12 +68,42 @@ static int darshan_log_get_pnetcdf_file(darshan_fd fd, void** pnetcdf_buf_p)
             return(-1);
     }
 
-    ret = darshan_log_get_mod(fd, DARSHAN_PNETCDF_MOD, file,
-        sizeof(struct darshan_pnetcdf_file));
+    if(fd->mod_ver[DARSHAN_PNETCDF_MOD] == DARSHAN_PNETCDF_VER)
+    {
+        /* log format is in current version, so we don't need to do any
+         * translation of counters while reading
+         */
+        rec_len = sizeof(struct darshan_pnetcdf_file);
+        ret = darshan_log_get_mod(fd, DARSHAN_PNETCDF_MOD, file, rec_len);
+    }
+    else
+    {
+        char scratch[1024] = {0};
+        char *src_p, *dest_p;
+        int len;
+
+        rec_len = DARSHAN_PNETCDF_FILE_SIZE_1;
+        ret = darshan_log_get_mod(fd, DARSHAN_PNETCDF_MOD, scratch, rec_len);
+        if(ret != rec_len)
+            goto exit;
+
+        /* upconvert version 1 to version 2 in-place */
+        dest_p = scratch + (sizeof(struct darshan_base_record) +
+            (2 * sizeof(int64_t)) + (3 * sizeof(double)));
+        src_p = dest_p - (2 * sizeof(double));
+        len = sizeof(double);
+        memmove(dest_p, src_p, len);
+        /* set F_CLOSE_START and F_OPEN_END to -1 */
+        *((double *)src_p) = -1;
+        *((double *)(src_p + sizeof(double))) = -1;
+
+        memcpy(file, scratch, sizeof(struct darshan_pnetcdf_file));
+    }
 
+exit:
     if(*pnetcdf_buf_p == NULL)
     {
-        if(ret == sizeof(struct darshan_pnetcdf_file))
+        if(ret == rec_len)
             *pnetcdf_buf_p = file;
         else
             free(file);
@@ -78,7 +111,7 @@ static int darshan_log_get_pnetcdf_file(darshan_fd fd, void** pnetcdf_buf_p)
 
     if(ret < 0)
         return(-1);
-    else if(ret < sizeof(struct darshan_pnetcdf_file))
+    else if(ret < rec_len)
         return(0);
     else
     {
@@ -90,7 +123,16 @@ static int darshan_log_get_pnetcdf_file(darshan_fd fd, void** pnetcdf_buf_p)
             for(i=0; i<PNETCDF_NUM_INDICES; i++)
                 DARSHAN_BSWAP64(&file->counters[i]);
             for(i=0; i<PNETCDF_F_NUM_INDICES; i++)
+            {
+                /* skip counters we explicitly set to -1 since they don't
+                 * need to be byte swapped
+                 */
+                if((fd->mod_ver[DARSHAN_PNETCDF_MOD] == 1) &&
+                    ((i == PNETCDF_F_CLOSE_START_TIMESTAMP) ||
+                     (i == PNETCDF_F_OPEN_END_TIMESTAMP)))
+                    continue;
                 DARSHAN_BSWAP64(&file->fcounters[i]);
+            }
         }
 
         return(1);
@@ -141,8 +183,15 @@ static void darshan_log_print_pnetcdf_description(int ver)
     printf("\n# description of PNETCDF counters:\n");
     printf("#   PNETCDF_INDEP_OPENS: PNETCDF independent file open operation counts.\n");
     printf("#   PNETCDF_COLL_OPENS: PNETCDF collective file open operation counts.\n");
-    printf("#   PNETCDF_F_OPEN_TIMESTAMP: timestamp of first PNETCDF file open.\n");
-    printf("#   PNETCDF_F_CLOSE_TIMESTAMP: timestamp of last PNETCDF file close.\n");
+    printf("#   PNETCDF_F_*_START_TIMESTAMP: timestamp of first PNETCDF file open/close.\n");
+    printf("#   PNETCDF_F_*_END_TIMESTAMP: timestamp of last PNETCDF file open/close.\n");
+
+    if(ver == 1)
+    {
+        printf("\n# WARNING: PNETCDF module log format version 1 does not support the following counters:\n");
+        printf("# - PNETCDF_F_CLOSE_START_TIMESTAMP\n");
+        printf("# - PNETCDF_F_OPEN_END_TIMESTAMP\n");
+    }
 
     return;
 }
@@ -244,7 +293,8 @@ static void darshan_log_agg_pnetcdf_files(void *rec, void *agg_rec, int init_fla
     {
         switch(i)
         {
-            case PNETCDF_F_OPEN_TIMESTAMP:
+            case PNETCDF_F_OPEN_START_TIMESTAMP:
+            case PNETCDF_F_CLOSE_START_TIMESTAMP:
                 /* minimum non-zero */
                 if((pnc_rec->fcounters[i] > 0)  &&
                     ((agg_pnc_rec->fcounters[i] == 0) ||
@@ -253,7 +303,8 @@ static void darshan_log_agg_pnetcdf_files(void *rec, void *agg_rec, int init_fla
                     agg_pnc_rec->fcounters[i] = pnc_rec->fcounters[i];
                 }
                 break;
-            case PNETCDF_F_CLOSE_TIMESTAMP:
+            case PNETCDF_F_OPEN_END_TIMESTAMP:
+            case PNETCDF_F_CLOSE_END_TIMESTAMP:
                 /* maximum */
                 if(pnc_rec->fcounters[i] > agg_pnc_rec->fcounters[i])
                 {



View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/commit/598a2e85ab3350ba8336a130930758597e986a4b

-- 
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/commit/598a2e85ab3350ba8336a130930758597e986a4b
You're receiving this email because of your account on xgitlab.cels.anl.gov.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20190116/6a76791a/attachment-0001.html>


More information about the Darshan-commits mailing list