[Darshan-commits] [Git][darshan/darshan][master] add close_start and open_end timers
Shane Snyder
xgitlab at cels.anl.gov
Wed Jan 16 14:33:58 CST 2019
Shane Snyder pushed to branch master at darshan / darshan
Commits:
598a2e85 by Shane Snyder at 2019-01-16T20:30:22Z
add close_start and open_end timers
for MPIIO, PNETCDF, and HDF5 modules.
- - - - -
10 changed files:
- darshan-hdf5-log-format.h
- darshan-mpiio-log-format.h
- darshan-pnetcdf-log-format.h
- darshan-runtime/lib/darshan-hdf5.c
- darshan-runtime/lib/darshan-mpiio.c
- darshan-runtime/lib/darshan-pnetcdf.c
- darshan-util/darshan-hdf5-logutils.c
- darshan-util/darshan-mpiio-logutils.c
- darshan-util/darshan-parser.c
- darshan-util/darshan-pnetcdf-logutils.c
Changes:
=====================================
darshan-hdf5-log-format.h
=====================================
@@ -8,7 +8,7 @@
#define __DARSHAN_HDF5_LOG_FORMAT_H
/* current HDF5 log format version */
-#define DARSHAN_HDF5_VER 1
+#define DARSHAN_HDF5_VER 2
#define HDF5_COUNTERS \
/* count of HDF5 opens */\
@@ -18,9 +18,13 @@
#define HDF5_F_COUNTERS \
/* timestamp of first open */\
- X(HDF5_F_OPEN_TIMESTAMP) \
+ X(HDF5_F_OPEN_START_TIMESTAMP) \
+ /* timestamp of first close */\
+ X(HDF5_F_CLOSE_START_TIMESTAMP) \
+ /* timestamp of last open */\
+ X(HDF5_F_OPEN_END_TIMESTAMP) \
/* timestamp of last close */\
- X(HDF5_F_CLOSE_TIMESTAMP) \
+ X(HDF5_F_CLOSE_END_TIMESTAMP) \
/* end of counters*/\
X(HDF5_F_NUM_INDICES)
=====================================
darshan-mpiio-log-format.h
=====================================
@@ -8,7 +8,7 @@
#define __DARSHAN_MPIIO_LOG_FORMAT_H
/* current MPI-IO log format version */
-#define DARSHAN_MPIIO_VER 2
+#define DARSHAN_MPIIO_VER 3
/* TODO: maybe use a counter to track cases in which a derived datatype is used? */
@@ -92,17 +92,21 @@
#define MPIIO_F_COUNTERS \
/* timestamp of first open */\
- X(MPIIO_F_OPEN_TIMESTAMP) \
+ X(MPIIO_F_OPEN_START_TIMESTAMP) \
/* timestamp of first read */\
X(MPIIO_F_READ_START_TIMESTAMP) \
/* timestamp of first write */\
X(MPIIO_F_WRITE_START_TIMESTAMP) \
+ /* timestamp of first close */\
+ X(MPIIO_F_CLOSE_START_TIMESTAMP) \
+ /* timestamp of last open */\
+ X(MPIIO_F_OPEN_END_TIMESTAMP) \
/* timestamp of last read */\
X(MPIIO_F_READ_END_TIMESTAMP) \
/* timestamp of last write */\
X(MPIIO_F_WRITE_END_TIMESTAMP) \
/* timestamp of last close */\
- X(MPIIO_F_CLOSE_TIMESTAMP) \
+ X(MPIIO_F_CLOSE_END_TIMESTAMP) \
/* cumulative MPI-IO read time */\
X(MPIIO_F_READ_TIME) \
/* cumulative MPI-IO write time */\
=====================================
darshan-pnetcdf-log-format.h
=====================================
@@ -8,7 +8,7 @@
#define __DARSHAN_PNETCDF_LOG_FORMAT_H
/* current PNETCDF log format version */
-#define DARSHAN_PNETCDF_VER 1
+#define DARSHAN_PNETCDF_VER 2
#define PNETCDF_COUNTERS \
/* count of PNETCDF independent opens */\
@@ -20,9 +20,13 @@
#define PNETCDF_F_COUNTERS \
/* timestamp of first open */\
- X(PNETCDF_F_OPEN_TIMESTAMP) \
+ X(PNETCDF_F_OPEN_START_TIMESTAMP) \
+ /* timestamp of first close */\
+ X(PNETCDF_F_CLOSE_START_TIMESTAMP) \
+ /* timestamp of last open */\
+ X(PNETCDF_F_OPEN_END_TIMESTAMP) \
/* timestamp of last close */\
- X(PNETCDF_F_CLOSE_TIMESTAMP) \
+ X(PNETCDF_F_CLOSE_END_TIMESTAMP) \
/* end of counters*/\
X(PNETCDF_F_NUM_INDICES)
=====================================
darshan-runtime/lib/darshan-hdf5.c
=====================================
@@ -90,7 +90,7 @@ static int my_rank = -1;
HDF5_UNLOCK(); \
} while(0)
-#define HDF5_RECORD_OPEN(__ret, __path, __tm1) do { \
+#define HDF5_RECORD_OPEN(__ret, __path, __tm1, __tm2) do { \
darshan_record_id rec_id; \
struct hdf5_file_record_ref *rec_ref; \
char *newpath; \
@@ -107,8 +107,10 @@ static int my_rank = -1;
if(newpath != __path) free(newpath); \
break; \
} \
- if(rec_ref->file_rec->fcounters[HDF5_F_OPEN_TIMESTAMP] == 0) \
- rec_ref->file_rec->fcounters[HDF5_F_OPEN_TIMESTAMP] = __tm1; \
+ if(rec_ref->file_rec->fcounters[HDF5_F_OPEN_START_TIMESTAMP] == 0 || \
+ rec_ref->file_rec->fcounters[HDF5_F_OPEN_START_TIMESTAMP] > __tm1) \
+ rec_ref->file_rec->fcounters[HDF5_F_OPEN_START_TIMESTAMP] = __tm1; \
+ rec_ref->file_rec->fcounters[HDF5_F_OPEN_END_TIMESTAMP] = __tm2; \
rec_ref->file_rec->counters[HDF5_OPENS] += 1; \
darshan_add_record_ref(&(hdf5_runtime->hid_hash), &__ret, sizeof(hid_t), rec_ref); \
if(newpath != __path) free(newpath); \
@@ -123,7 +125,7 @@ hid_t DARSHAN_DECL(H5Fcreate)(const char *filename, unsigned flags,
{
hid_t ret;
char* tmp;
- double tm1;
+ double tm1, tm2;
unsigned majnum, minnum, relnum;
H5get_libversion(&majnum, &minnum, &relnum);
@@ -146,6 +148,7 @@ hid_t DARSHAN_DECL(H5Fcreate)(const char *filename, unsigned flags,
tm1 = darshan_core_wtime();
ret = __real_H5Fcreate(filename, flags, create_plist, access_plist);
+ tm2 = darshan_core_wtime();
if(ret >= 0)
{
/* use ROMIO approach to strip prefix if present */
@@ -159,7 +162,7 @@ hid_t DARSHAN_DECL(H5Fcreate)(const char *filename, unsigned flags,
}
HDF5_PRE_RECORD();
- HDF5_RECORD_OPEN(ret, filename, tm1);
+ HDF5_RECORD_OPEN(ret, filename, tm1, tm2);
HDF5_POST_RECORD();
}
@@ -171,7 +174,7 @@ hid_t DARSHAN_DECL(H5Fopen)(const char *filename, unsigned flags,
{
hid_t ret;
char* tmp;
- double tm1;
+ double tm1, tm2;
unsigned majnum, minnum, relnum;
H5get_libversion(&majnum, &minnum, &relnum);
@@ -194,6 +197,7 @@ hid_t DARSHAN_DECL(H5Fopen)(const char *filename, unsigned flags,
tm1 = darshan_core_wtime();
ret = __real_H5Fopen(filename, flags, access_plist);
+ tm2 = darshan_core_wtime();
if(ret >= 0)
{
/* use ROMIO approach to strip prefix if present */
@@ -207,7 +211,7 @@ hid_t DARSHAN_DECL(H5Fopen)(const char *filename, unsigned flags,
}
HDF5_PRE_RECORD();
- HDF5_RECORD_OPEN(ret, filename, tm1);
+ HDF5_RECORD_OPEN(ret, filename, tm1, tm2);
HDF5_POST_RECORD();
}
@@ -218,19 +222,24 @@ hid_t DARSHAN_DECL(H5Fopen)(const char *filename, unsigned flags,
herr_t DARSHAN_DECL(H5Fclose)(hid_t file_id)
{
struct hdf5_file_record_ref *rec_ref;
+ double tm1, tm2;
herr_t ret;
MAP_OR_FAIL(H5Fclose);
+ tm1 = darshan_core_wtime();
ret = __real_H5Fclose(file_id);
+ tm2 = darshan_core_wtime();
HDF5_PRE_RECORD();
rec_ref = darshan_lookup_record_ref(hdf5_runtime->hid_hash,
&file_id, sizeof(hid_t));
if(rec_ref)
{
- rec_ref->file_rec->fcounters[HDF5_F_CLOSE_TIMESTAMP] =
- darshan_core_wtime();
+ if(rec_ref->file_rec->fcounters[HDF5_F_CLOSE_START_TIMESTAMP] == 0 ||
+ rec_ref->file_rec->fcounters[HDF5_F_CLOSE_START_TIMESTAMP] > tm1)
+ rec_ref->file_rec->fcounters[HDF5_F_CLOSE_START_TIMESTAMP] = tm1;
+ rec_ref->file_rec->fcounters[HDF5_F_CLOSE_END_TIMESTAMP] = tm2;
darshan_delete_record_ref(&(hdf5_runtime->hid_hash),
&file_id, sizeof(hid_t));
}
@@ -349,7 +358,7 @@ static void hdf5_record_reduction_op(void* infile_v, void* inoutfile_v,
}
/* min non-zero (if available) value */
- for(j=HDF5_F_OPEN_TIMESTAMP; j<=HDF5_F_OPEN_TIMESTAMP; j++)
+ for(j=HDF5_F_OPEN_START_TIMESTAMP; j<=HDF5_F_CLOSE_START_TIMESTAMP; j++)
{
if((infile->fcounters[j] < inoutfile->fcounters[j] &&
infile->fcounters[j] > 0) || inoutfile->fcounters[j] == 0)
@@ -359,7 +368,7 @@ static void hdf5_record_reduction_op(void* infile_v, void* inoutfile_v,
}
/* max */
- for(j=HDF5_F_CLOSE_TIMESTAMP; j<=HDF5_F_CLOSE_TIMESTAMP; j++)
+ for(j=HDF5_F_OPEN_END_TIMESTAMP; j<=HDF5_F_CLOSE_END_TIMESTAMP; j++)
{
if(infile->fcounters[j] > inoutfile->fcounters[j])
tmp_file.fcounters[j] = infile->fcounters[j];
=====================================
darshan-runtime/lib/darshan-mpiio.c
=====================================
@@ -229,9 +229,10 @@ static int enable_dxt_io_trace = 0;
rec_ref->file_rec->counters[MPIIO_COLL_OPENS] += 1; \
if(__info != MPI_INFO_NULL) \
rec_ref->file_rec->counters[MPIIO_HINTS] += 1; \
- if(rec_ref->file_rec->fcounters[MPIIO_F_OPEN_TIMESTAMP] == 0 || \
- rec_ref->file_rec->fcounters[MPIIO_F_OPEN_TIMESTAMP] > __tm1) \
- rec_ref->file_rec->fcounters[MPIIO_F_OPEN_TIMESTAMP] = __tm1; \
+ if(rec_ref->file_rec->fcounters[MPIIO_F_OPEN_START_TIMESTAMP] == 0 || \
+ rec_ref->file_rec->fcounters[MPIIO_F_OPEN_START_TIMESTAMP] > __tm1) \
+ rec_ref->file_rec->fcounters[MPIIO_F_OPEN_START_TIMESTAMP] = __tm1; \
+ rec_ref->file_rec->fcounters[MPIIO_F_OPEN_END_TIMESTAMP] = __tm2; \
DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[MPIIO_F_META_TIME], \
__tm1, __tm2, rec_ref->last_meta_end); \
darshan_add_record_ref(&(mpiio_runtime->fh_hash), &__fh, sizeof(MPI_File), rec_ref); \
@@ -1090,8 +1091,10 @@ int DARSHAN_DECL(MPI_File_close)(MPI_File *fh)
&tmp_fh, sizeof(MPI_File));
if(rec_ref)
{
- rec_ref->file_rec->fcounters[MPIIO_F_CLOSE_TIMESTAMP] =
- darshan_core_wtime();
+ if(rec_ref->file_rec->fcounters[MPIIO_F_CLOSE_START_TIMESTAMP] == 0 ||
+ rec_ref->file_rec->fcounters[MPIIO_F_CLOSE_START_TIMESTAMP] > tm1)
+ rec_ref->file_rec->fcounters[MPIIO_F_CLOSE_START_TIMESTAMP] = tm1;
+ rec_ref->file_rec->fcounters[MPIIO_F_CLOSE_END_TIMESTAMP] = tm2;
DARSHAN_TIMER_INC_NO_OVERLAP(
rec_ref->file_rec->fcounters[MPIIO_F_META_TIME],
tm1, tm2, rec_ref->last_meta_end);
@@ -1273,7 +1276,7 @@ static void mpiio_record_reduction_op(
}
/* min non-zero (if available) value */
- for(j=MPIIO_F_OPEN_TIMESTAMP; j<=MPIIO_F_WRITE_START_TIMESTAMP; j++)
+ for(j=MPIIO_F_OPEN_START_TIMESTAMP; j<=MPIIO_F_CLOSE_START_TIMESTAMP; j++)
{
if((infile->fcounters[j] < inoutfile->fcounters[j] &&
infile->fcounters[j] > 0) || inoutfile->fcounters[j] == 0)
@@ -1283,7 +1286,7 @@ static void mpiio_record_reduction_op(
}
/* max */
- for(j=MPIIO_F_READ_END_TIMESTAMP; j<= MPIIO_F_CLOSE_TIMESTAMP; j++)
+ for(j=MPIIO_F_OPEN_END_TIMESTAMP; j<= MPIIO_F_CLOSE_END_TIMESTAMP; j++)
{
if(infile->fcounters[j] > inoutfile->fcounters[j])
tmp_file.fcounters[j] = infile->fcounters[j];
=====================================
darshan-runtime/lib/darshan-pnetcdf.c
=====================================
@@ -76,7 +76,7 @@ static int my_rank = -1;
PNETCDF_UNLOCK(); \
} while(0)
-#define PNETCDF_RECORD_OPEN(__ncidp, __path, __comm, __tm1) do { \
+#define PNETCDF_RECORD_OPEN(__ncidp, __path, __comm, __tm1, __tm2) do { \
darshan_record_id rec_id; \
struct pnetcdf_file_record_ref *rec_ref; \
char *newpath; \
@@ -95,8 +95,10 @@ static int my_rank = -1;
break; \
} \
PMPI_Comm_size(__comm, &comm_size); \
- if(rec_ref->file_rec->fcounters[PNETCDF_F_OPEN_TIMESTAMP] == 0) \
- rec_ref->file_rec->fcounters[PNETCDF_F_OPEN_TIMESTAMP] = __tm1; \
+ if(rec_ref->file_rec->fcounters[PNETCDF_F_OPEN_START_TIMESTAMP] == 0 || \
+ rec_ref->file_rec->fcounters[PNETCDF_F_OPEN_START_TIMESTAMP] > __tm1) \
+ rec_ref->file_rec->fcounters[PNETCDF_F_OPEN_START_TIMESTAMP] = __tm1; \
+ rec_ref->file_rec->fcounters[PNETCDF_F_OPEN_END_TIMESTAMP] = __tm2; \
if(comm_size == 1) rec_ref->file_rec->counters[PNETCDF_INDEP_OPENS] += 1; \
else rec_ref->file_rec->counters[PNETCDF_COLL_OPENS] += 1; \
darshan_add_record_ref(&(pnetcdf_runtime->ncid_hash), __ncidp, sizeof(int), rec_ref); \
@@ -112,12 +114,13 @@ int DARSHAN_DECL(ncmpi_create)(MPI_Comm comm, const char *path,
{
int ret;
char* tmp;
- double tm1;
+ double tm1, tm2;
MAP_OR_FAIL(ncmpi_create);
tm1 = darshan_core_wtime();
ret = __real_ncmpi_create(comm, path, cmode, info, ncidp);
+ tm2 = darshan_core_wtime();
if(ret == 0)
{
/* use ROMIO approach to strip prefix if present */
@@ -131,7 +134,7 @@ int DARSHAN_DECL(ncmpi_create)(MPI_Comm comm, const char *path,
}
PNETCDF_PRE_RECORD();
- PNETCDF_RECORD_OPEN(ncidp, path, comm, tm1);
+ PNETCDF_RECORD_OPEN(ncidp, path, comm, tm1, tm2);
PNETCDF_POST_RECORD();
}
@@ -143,12 +146,13 @@ int DARSHAN_DECL(ncmpi_open)(MPI_Comm comm, const char *path,
{
int ret;
char* tmp;
- double tm1;
+ double tm1, tm2;
MAP_OR_FAIL(ncmpi_open);
tm1 = darshan_core_wtime();
ret = __real_ncmpi_open(comm, path, omode, info, ncidp);
+ tm2 = darshan_core_wtime();
if(ret == 0)
{
/* use ROMIO approach to strip prefix if present */
@@ -162,7 +166,7 @@ int DARSHAN_DECL(ncmpi_open)(MPI_Comm comm, const char *path,
}
PNETCDF_PRE_RECORD();
- PNETCDF_RECORD_OPEN(ncidp, path, comm, tm1);
+ PNETCDF_RECORD_OPEN(ncidp, path, comm, tm1, tm2);
PNETCDF_POST_RECORD();
}
@@ -173,18 +177,23 @@ int DARSHAN_DECL(ncmpi_close)(int ncid)
{
struct pnetcdf_file_record_ref *rec_ref;
int ret;
+ double tm1, tm2;
MAP_OR_FAIL(ncmpi_close);
+ tm1 = darshan_core_wtime();
ret = __real_ncmpi_close(ncid);
+ tm2 = darshan_core_wtime();
PNETCDF_PRE_RECORD();
rec_ref = darshan_lookup_record_ref(pnetcdf_runtime->ncid_hash,
&ncid, sizeof(int));
if(rec_ref)
{
- rec_ref->file_rec->fcounters[PNETCDF_F_CLOSE_TIMESTAMP] =
- darshan_core_wtime();
+ if(rec_ref->file_rec->fcounters[PNETCDF_F_CLOSE_START_TIMESTAMP] == 0 ||
+ rec_ref->file_rec->fcounters[PNETCDF_F_CLOSE_START_TIMESTAMP] > tm1)
+ rec_ref->file_rec->fcounters[PNETCDF_F_CLOSE_START_TIMESTAMP] = tm1;
+ rec_ref->file_rec->fcounters[PNETCDF_F_CLOSE_END_TIMESTAMP] = tm2;
darshan_delete_record_ref(&(pnetcdf_runtime->ncid_hash),
&ncid, sizeof(int));
}
@@ -302,7 +311,7 @@ static void pnetcdf_record_reduction_op(void* infile_v, void* inoutfile_v,
}
/* min non-zero (if available) value */
- for(j=PNETCDF_F_OPEN_TIMESTAMP; j<=PNETCDF_F_OPEN_TIMESTAMP; j++)
+ for(j=PNETCDF_F_OPEN_START_TIMESTAMP; j<=PNETCDF_F_CLOSE_START_TIMESTAMP; j++)
{
if((infile->fcounters[j] < inoutfile->fcounters[j] &&
infile->fcounters[j] > 0) || inoutfile->fcounters[j] == 0)
@@ -312,7 +321,7 @@ static void pnetcdf_record_reduction_op(void* infile_v, void* inoutfile_v,
}
/* max */
- for(j=PNETCDF_F_CLOSE_TIMESTAMP; j<=PNETCDF_F_CLOSE_TIMESTAMP; j++)
+ for(j=PNETCDF_F_OPEN_END_TIMESTAMP; j<=PNETCDF_F_CLOSE_END_TIMESTAMP; j++)
{
if(infile->fcounters[j] > inoutfile->fcounters[j])
tmp_file.fcounters[j] = infile->fcounters[j];
=====================================
darshan-util/darshan-hdf5-logutils.c
=====================================
@@ -30,6 +30,8 @@ char *hdf5_f_counter_names[] = {
};
#undef X
+#define DARSHAN_HDF5_FILE_SIZE_1 40
+
static int darshan_log_get_hdf5_file(darshan_fd fd, void** hdf5_buf_p);
static int darshan_log_put_hdf5_file(darshan_fd fd, void* hdf5_buf);
static void darshan_log_print_hdf5_file(void *file_rec,
@@ -52,6 +54,7 @@ struct darshan_mod_logutil_funcs hdf5_logutils =
static int darshan_log_get_hdf5_file(darshan_fd fd, void** hdf5_buf_p)
{
struct darshan_hdf5_file *file = *((struct darshan_hdf5_file **)hdf5_buf_p);
+ int rec_len;
int i;
int ret;
@@ -65,12 +68,42 @@ static int darshan_log_get_hdf5_file(darshan_fd fd, void** hdf5_buf_p)
return(-1);
}
- ret = darshan_log_get_mod(fd, DARSHAN_HDF5_MOD, file,
- sizeof(struct darshan_hdf5_file));
+ if(fd->mod_ver[DARSHAN_HDF5_MOD] == DARSHAN_HDF5_VER)
+ {
+ /* log format is in current version, so we don't need to do any
+ * translation of counters while reading
+ */
+ rec_len = sizeof(struct darshan_hdf5_file);
+ ret = darshan_log_get_mod(fd, DARSHAN_HDF5_MOD, file, rec_len);
+ }
+ else
+ {
+ char scratch[1024] = {0};
+ char *src_p, *dest_p;
+ int len;
+
+ rec_len = DARSHAN_HDF5_FILE_SIZE_1;
+ ret = darshan_log_get_mod(fd, DARSHAN_HDF5_MOD, scratch, rec_len);
+ if(ret != rec_len)
+ goto exit;
+
+ /* upconvert version 1 to version 2 in-place */
+ dest_p = scratch + (sizeof(struct darshan_base_record) +
+ (1 * sizeof(int64_t)) + (3 * sizeof(double)));
+ src_p = dest_p - (2 * sizeof(double));
+ len = sizeof(double);
+ memmove(dest_p, src_p, len);
+ /* set F_CLOSE_START and F_OPEN_END to -1 */
+ *((double *)src_p) = -1;
+ *((double *)(src_p + sizeof(double))) = -1;
+
+ memcpy(file, scratch, sizeof(struct darshan_hdf5_file));
+ }
+exit:
if(*hdf5_buf_p == NULL)
{
- if(ret == sizeof(struct darshan_hdf5_file))
+ if(ret == rec_len)
*hdf5_buf_p = file;
else
free(file);
@@ -78,7 +111,7 @@ static int darshan_log_get_hdf5_file(darshan_fd fd, void** hdf5_buf_p)
if(ret < 0)
return(-1);
- else if(ret < sizeof(struct darshan_hdf5_file))
+ else if(ret < rec_len)
return(0);
else
{
@@ -90,7 +123,16 @@ static int darshan_log_get_hdf5_file(darshan_fd fd, void** hdf5_buf_p)
for(i=0; i<HDF5_NUM_INDICES; i++)
DARSHAN_BSWAP64(&file->counters[i]);
for(i=0; i<HDF5_F_NUM_INDICES; i++)
+ {
+ /* skip counters we explicitly set to -1 since they don't
+ * need to be byte swapped
+ */
+ if((fd->mod_ver[DARSHAN_HDF5_MOD] == 1) &&
+ ((i == HDF5_F_CLOSE_START_TIMESTAMP) ||
+ (i == HDF5_F_OPEN_END_TIMESTAMP)))
+ continue;
DARSHAN_BSWAP64(&file->fcounters[i]);
+ }
}
return(1);
@@ -140,8 +182,15 @@ static void darshan_log_print_hdf5_description(int ver)
{
printf("\n# description of HDF5 counters:\n");
printf("# HDF5_OPENS: HDF5 file open operation counts.\n");
- printf("# HDF5_F_OPEN_TIMESTAMP: timestamp of first HDF5 file open.\n");
- printf("# HDF5_F_CLOSE_TIMESTAMP: timestamp of last HDF5 file close.\n");
+ printf("# HDF5_F_*_START_TIMESTAMP: timestamp of first HDF5 file open/close.\n");
+ printf("# HDF5_F_*_END_TIMESTAMP: timestamp of last HDF5 file open/close.\n");
+
+ if(ver == 1)
+ {
+ printf("\n# WARNING: HDF5 module log format version 1 does not support the following counters:\n");
+ printf("# - HDF5_F_CLOSE_START_TIMESTAMP\n");
+ printf("# - HDF5_F_OPEN_END_TIMESTAMP\n");
+ }
return;
}
@@ -242,7 +291,8 @@ static void darshan_log_agg_hdf5_files(void *rec, void *agg_rec, int init_flag)
{
switch(i)
{
- case HDF5_F_OPEN_TIMESTAMP:
+ case HDF5_F_OPEN_START_TIMESTAMP:
+ case HDF5_F_CLOSE_START_TIMESTAMP:
/* minimum non-zero */
if((hdf5_rec->fcounters[i] > 0) &&
((agg_hdf5_rec->fcounters[i] == 0) ||
@@ -251,7 +301,8 @@ static void darshan_log_agg_hdf5_files(void *rec, void *agg_rec, int init_flag)
agg_hdf5_rec->fcounters[i] = hdf5_rec->fcounters[i];
}
break;
- case HDF5_F_CLOSE_TIMESTAMP:
+ case HDF5_F_OPEN_END_TIMESTAMP:
+ case HDF5_F_CLOSE_END_TIMESTAMP:
/* maximum */
if(hdf5_rec->fcounters[i] > agg_hdf5_rec->fcounters[i])
{
=====================================
darshan-util/darshan-mpiio-logutils.c
=====================================
@@ -30,6 +30,8 @@ char *mpiio_f_counter_names[] = {
};
#undef X
+#define DARSHAN_MPIIO_FILE_SIZE_1 544
+
static int darshan_log_get_mpiio_file(darshan_fd fd, void** mpiio_buf_p);
static int darshan_log_put_mpiio_file(darshan_fd fd, void* mpiio_buf);
static void darshan_log_print_mpiio_file(void *file_rec,
@@ -52,6 +54,7 @@ struct darshan_mod_logutil_funcs mpiio_logutils =
static int darshan_log_get_mpiio_file(darshan_fd fd, void** mpiio_buf_p)
{
struct darshan_mpiio_file *file = *((struct darshan_mpiio_file **)mpiio_buf_p);
+ int rec_len;
int i;
int ret;
@@ -64,13 +67,43 @@ static int darshan_log_get_mpiio_file(darshan_fd fd, void** mpiio_buf_p)
if(!file)
return(-1);
}
-
- ret = darshan_log_get_mod(fd, DARSHAN_MPIIO_MOD, file,
- sizeof(struct darshan_mpiio_file));
+ if(fd->mod_ver[DARSHAN_MPIIO_MOD] == DARSHAN_MPIIO_VER)
+ {
+ /* log format is in current version, so we don't need to do any
+ * translation of counters while reading
+ */
+ rec_len = sizeof(struct darshan_mpiio_file);
+ ret = darshan_log_get_mod(fd, DARSHAN_MPIIO_MOD, file, rec_len);
+ }
+ else
+ {
+ char scratch[1024] = {0};
+ char *src_p, *dest_p;
+ int len;
+
+ rec_len = DARSHAN_MPIIO_FILE_SIZE_1;
+ ret = darshan_log_get_mod(fd, DARSHAN_MPIIO_MOD, scratch, rec_len);
+ if(ret != rec_len)
+ goto exit;
+
+ /* upconvert versions 1/2 to version 3 in-place */
+ dest_p = scratch + (sizeof(struct darshan_base_record) +
+ (51 * sizeof(int64_t)) + (5 * sizeof(double)));
+ src_p = dest_p - (2 * sizeof(double));
+ len = (12 * sizeof(double));
+ memmove(dest_p, src_p, len);
+ /* set F_CLOSE_START and F_OPEN_END to -1 */
+ *((double *)src_p) = -1;
+ *((double *)(src_p + sizeof(double))) = -1;
+
+ memcpy(file, scratch, sizeof(struct darshan_mpiio_file));
+ }
+
+exit:
if(*mpiio_buf_p == NULL)
{
- if(ret == sizeof(struct darshan_mpiio_file))
+ if(ret == rec_len)
*mpiio_buf_p = file;
else
free(file);
@@ -78,7 +111,7 @@ static int darshan_log_get_mpiio_file(darshan_fd fd, void** mpiio_buf_p)
if(ret < 0)
return(-1);
- else if(ret < sizeof(struct darshan_mpiio_file))
+ else if(ret < rec_len)
return(0);
else
{
@@ -90,7 +123,16 @@ static int darshan_log_get_mpiio_file(darshan_fd fd, void** mpiio_buf_p)
for(i=0; i<MPIIO_NUM_INDICES; i++)
DARSHAN_BSWAP64(&file->counters[i]);
for(i=0; i<MPIIO_F_NUM_INDICES; i++)
+ {
+ /* skip counters we explicitly set to -1 since they don't
+ * need to be byte swapped
+ */
+ if((fd->mod_ver[DARSHAN_MPIIO_MOD] < 3) &&
+ ((i == MPIIO_F_CLOSE_START_TIMESTAMP) ||
+ (i == MPIIO_F_OPEN_END_TIMESTAMP)))
+ continue;
DARSHAN_BSWAP64(&file->fcounters[i]);
+ }
}
return(1);
@@ -156,20 +198,24 @@ static void darshan_log_print_mpiio_description(int ver)
printf("# MPIIO_ACCESS*_COUNT: count of the four most common total access sizes.\n");
printf("# MPIIO_*_RANK: rank of the processes that were the fastest and slowest at I/O (for shared files).\n");
printf("# MPIIO_*_RANK_BYTES: total bytes transferred at MPI-IO layer by the fastest and slowest ranks (for shared files).\n");
- printf("# MPIIO_F_OPEN_TIMESTAMP: timestamp of first open.\n");
- printf("# MPIIO_F_*_START_TIMESTAMP: timestamp of first MPI-IO read/write.\n");
- printf("# MPIIO_F_*_END_TIMESTAMP: timestamp of last MPI-IO read/write.\n");
- printf("# MPIIO_F_CLOSE_TIMESTAMP: timestamp of last close.\n");
+ printf("# MPIIO_F_*_START_TIMESTAMP: timestamp of first MPI-IO open/read/write/close.\n");
+ printf("# MPIIO_F_*_END_TIMESTAMP: timestamp of last MPI-IO open/read/write/close.\n");
printf("# MPIIO_F_READ/WRITE/META_TIME: cumulative time spent in MPI-IO read, write, or metadata operations.\n");
printf("# MPIIO_F_MAX_*_TIME: duration of the slowest MPI-IO read and write operations.\n");
printf("# MPIIO_F_*_RANK_TIME: fastest and slowest I/O time for a single rank (for shared files).\n");
printf("# MPIIO_F_VARIANCE_RANK_*: variance of total I/O time and bytes moved for all ranks (for shared files).\n");
- if(ver < 2)
+ if(ver == 1)
{
printf("\n# WARNING: MPIIO module log format version 1 has the following limitations:\n");
printf("# - MPIIO_F_WRITE_START_TIMESTAMP may not be accurate.\n");
}
+ if(ver <= 2)
+ {
+ printf("\n# WARNING: MPIIO module log format version <=2 does not support the following counters:\n");
+ printf("# - MPIIO_F_CLOSE_START_TIMESTAMP\n");
+ printf("# - MPIIO_F_OPEN_END_TIMESTAMP\n");
+ }
return;
}
@@ -422,9 +468,10 @@ static void darshan_log_agg_mpiio_files(void *rec, void *agg_rec, int init_flag)
/* sum */
agg_mpi_rec->fcounters[i] += mpi_rec->fcounters[i];
break;
- case MPIIO_F_OPEN_TIMESTAMP:
+ case MPIIO_F_OPEN_START_TIMESTAMP:
case MPIIO_F_READ_START_TIMESTAMP:
case MPIIO_F_WRITE_START_TIMESTAMP:
+ case MPIIO_F_CLOSE_START_TIMESTAMP:
/* minimum non-zero */
if((mpi_rec->fcounters[i] > 0) &&
((agg_mpi_rec->fcounters[i] == 0) ||
@@ -433,9 +480,10 @@ static void darshan_log_agg_mpiio_files(void *rec, void *agg_rec, int init_flag)
agg_mpi_rec->fcounters[i] = mpi_rec->fcounters[i];
}
break;
+ case MPIIO_F_OPEN_END_TIMESTAMP:
case MPIIO_F_READ_END_TIMESTAMP:
case MPIIO_F_WRITE_END_TIMESTAMP:
- case MPIIO_F_CLOSE_TIMESTAMP:
+ case MPIIO_F_CLOSE_END_TIMESTAMP:
/* maximum */
if(mpi_rec->fcounters[i] > agg_mpi_rec->fcounters[i])
{
=====================================
darshan-util/darshan-parser.c
=====================================
@@ -1149,18 +1149,20 @@ void mpiio_accum_file(struct darshan_mpiio_file *mfile,
{
switch(i)
{
- case MPIIO_F_OPEN_TIMESTAMP:
+ case MPIIO_F_OPEN_START_TIMESTAMP:
case MPIIO_F_READ_START_TIMESTAMP:
case MPIIO_F_WRITE_START_TIMESTAMP:
+ case MPIIO_F_CLOSE_START_TIMESTAMP:
if(tmp->fcounters[i] == 0 ||
tmp->fcounters[i] > mfile->fcounters[i])
{
tmp->fcounters[i] = mfile->fcounters[i];
}
break;
+ case MPIIO_F_OPEN_END_TIMESTAMP:
case MPIIO_F_READ_END_TIMESTAMP:
case MPIIO_F_WRITE_END_TIMESTAMP:
- case MPIIO_F_CLOSE_TIMESTAMP:
+ case MPIIO_F_CLOSE_END_TIMESTAMP:
if(tmp->fcounters[i] == 0 ||
tmp->fcounters[i] < mfile->fcounters[i])
{
@@ -1386,12 +1388,12 @@ void mpiio_accum_perf(struct darshan_mpiio_file *mfile,
if(mfile->base_rec.rank == -1)
{
/* by_open */
- if(mfile->fcounters[MPIIO_F_CLOSE_TIMESTAMP] >
- mfile->fcounters[MPIIO_F_OPEN_TIMESTAMP])
+ if(mfile->fcounters[MPIIO_F_CLOSE_END_TIMESTAMP] >
+ mfile->fcounters[MPIIO_F_OPEN_START_TIMESTAMP])
{
pdata->shared_time_by_open +=
- mfile->fcounters[MPIIO_F_CLOSE_TIMESTAMP] -
- mfile->fcounters[MPIIO_F_OPEN_TIMESTAMP];
+ mfile->fcounters[MPIIO_F_CLOSE_END_TIMESTAMP] -
+ mfile->fcounters[MPIIO_F_OPEN_START_TIMESTAMP];
}
/* by_open_lastio */
@@ -1399,21 +1401,21 @@ void mpiio_accum_perf(struct darshan_mpiio_file *mfile,
mfile->fcounters[MPIIO_F_WRITE_END_TIMESTAMP])
{
/* be careful: file may have been opened but not read or written */
- if(mfile->fcounters[MPIIO_F_READ_END_TIMESTAMP] > mfile->fcounters[MPIIO_F_OPEN_TIMESTAMP])
+ if(mfile->fcounters[MPIIO_F_READ_END_TIMESTAMP] > mfile->fcounters[MPIIO_F_OPEN_START_TIMESTAMP])
{
pdata->shared_time_by_open_lastio +=
mfile->fcounters[MPIIO_F_READ_END_TIMESTAMP] -
- mfile->fcounters[MPIIO_F_OPEN_TIMESTAMP];
+ mfile->fcounters[MPIIO_F_OPEN_START_TIMESTAMP];
}
}
else
{
/* be careful: file may have been opened but not read or written */
- if(mfile->fcounters[MPIIO_F_WRITE_END_TIMESTAMP] > mfile->fcounters[MPIIO_F_OPEN_TIMESTAMP])
+ if(mfile->fcounters[MPIIO_F_WRITE_END_TIMESTAMP] > mfile->fcounters[MPIIO_F_OPEN_START_TIMESTAMP])
{
pdata->shared_time_by_open_lastio +=
mfile->fcounters[MPIIO_F_WRITE_END_TIMESTAMP] -
- mfile->fcounters[MPIIO_F_OPEN_TIMESTAMP];
+ mfile->fcounters[MPIIO_F_OPEN_START_TIMESTAMP];
}
}
@@ -2011,7 +2013,7 @@ void mpiio_file_list(hash_entry_t *file_hash,
if(detail_flag)
{
- for(i=MPIIO_F_OPEN_TIMESTAMP; i<=MPIIO_F_CLOSE_TIMESTAMP; i++)
+ for(i=MPIIO_F_OPEN_START_TIMESTAMP; i<=MPIIO_F_CLOSE_END_TIMESTAMP; i++)
{
printf("\t%f", file_rec->fcounters[i]);
}
=====================================
darshan-util/darshan-pnetcdf-logutils.c
=====================================
@@ -30,6 +30,8 @@ char *pnetcdf_f_counter_names[] = {
};
#undef X
+#define DARSHAN_PNETCDF_FILE_SIZE_1 48
+
static int darshan_log_get_pnetcdf_file(darshan_fd fd, void** pnetcdf_buf_p);
static int darshan_log_put_pnetcdf_file(darshan_fd fd, void* pnetcdf_buf);
static void darshan_log_print_pnetcdf_file(void *file_rec,
@@ -52,6 +54,7 @@ struct darshan_mod_logutil_funcs pnetcdf_logutils =
static int darshan_log_get_pnetcdf_file(darshan_fd fd, void** pnetcdf_buf_p)
{
struct darshan_pnetcdf_file *file = *((struct darshan_pnetcdf_file **)pnetcdf_buf_p);
+ int rec_len;
int i;
int ret;
@@ -65,12 +68,42 @@ static int darshan_log_get_pnetcdf_file(darshan_fd fd, void** pnetcdf_buf_p)
return(-1);
}
- ret = darshan_log_get_mod(fd, DARSHAN_PNETCDF_MOD, file,
- sizeof(struct darshan_pnetcdf_file));
+ if(fd->mod_ver[DARSHAN_PNETCDF_MOD] == DARSHAN_PNETCDF_VER)
+ {
+ /* log format is in current version, so we don't need to do any
+ * translation of counters while reading
+ */
+ rec_len = sizeof(struct darshan_pnetcdf_file);
+ ret = darshan_log_get_mod(fd, DARSHAN_PNETCDF_MOD, file, rec_len);
+ }
+ else
+ {
+ char scratch[1024] = {0};
+ char *src_p, *dest_p;
+ int len;
+
+ rec_len = DARSHAN_PNETCDF_FILE_SIZE_1;
+ ret = darshan_log_get_mod(fd, DARSHAN_PNETCDF_MOD, scratch, rec_len);
+ if(ret != rec_len)
+ goto exit;
+
+ /* upconvert version 1 to version 2 in-place */
+ dest_p = scratch + (sizeof(struct darshan_base_record) +
+ (2 * sizeof(int64_t)) + (3 * sizeof(double)));
+ src_p = dest_p - (2 * sizeof(double));
+ len = sizeof(double);
+ memmove(dest_p, src_p, len);
+ /* set F_CLOSE_START and F_OPEN_END to -1 */
+ *((double *)src_p) = -1;
+ *((double *)(src_p + sizeof(double))) = -1;
+
+ memcpy(file, scratch, sizeof(struct darshan_pnetcdf_file));
+ }
+exit:
if(*pnetcdf_buf_p == NULL)
{
- if(ret == sizeof(struct darshan_pnetcdf_file))
+ if(ret == rec_len)
*pnetcdf_buf_p = file;
else
free(file);
@@ -78,7 +111,7 @@ static int darshan_log_get_pnetcdf_file(darshan_fd fd, void** pnetcdf_buf_p)
if(ret < 0)
return(-1);
- else if(ret < sizeof(struct darshan_pnetcdf_file))
+ else if(ret < rec_len)
return(0);
else
{
@@ -90,7 +123,16 @@ static int darshan_log_get_pnetcdf_file(darshan_fd fd, void** pnetcdf_buf_p)
for(i=0; i<PNETCDF_NUM_INDICES; i++)
DARSHAN_BSWAP64(&file->counters[i]);
for(i=0; i<PNETCDF_F_NUM_INDICES; i++)
+ {
+ /* skip counters we explicitly set to -1 since they don't
+ * need to be byte swapped
+ */
+ if((fd->mod_ver[DARSHAN_PNETCDF_MOD] == 1) &&
+ ((i == PNETCDF_F_CLOSE_START_TIMESTAMP) ||
+ (i == PNETCDF_F_OPEN_END_TIMESTAMP)))
+ continue;
DARSHAN_BSWAP64(&file->fcounters[i]);
+ }
}
return(1);
@@ -141,8 +183,15 @@ static void darshan_log_print_pnetcdf_description(int ver)
printf("\n# description of PNETCDF counters:\n");
printf("# PNETCDF_INDEP_OPENS: PNETCDF independent file open operation counts.\n");
printf("# PNETCDF_COLL_OPENS: PNETCDF collective file open operation counts.\n");
- printf("# PNETCDF_F_OPEN_TIMESTAMP: timestamp of first PNETCDF file open.\n");
- printf("# PNETCDF_F_CLOSE_TIMESTAMP: timestamp of last PNETCDF file close.\n");
+ printf("# PNETCDF_F_*_START_TIMESTAMP: timestamp of first PNETCDF file open/close.\n");
+ printf("# PNETCDF_F_*_END_TIMESTAMP: timestamp of last PNETCDF file open/close.\n");
+
+ if(ver == 1)
+ {
+ printf("\n# WARNING: PNETCDF module log format version 1 does not support the following counters:\n");
+ printf("# - PNETCDF_F_CLOSE_START_TIMESTAMP\n");
+ printf("# - PNETCDF_F_OPEN_END_TIMESTAMP\n");
+ }
return;
}
@@ -244,7 +293,8 @@ static void darshan_log_agg_pnetcdf_files(void *rec, void *agg_rec, int init_fla
{
switch(i)
{
- case PNETCDF_F_OPEN_TIMESTAMP:
+ case PNETCDF_F_OPEN_START_TIMESTAMP:
+ case PNETCDF_F_CLOSE_START_TIMESTAMP:
/* minimum non-zero */
if((pnc_rec->fcounters[i] > 0) &&
((agg_pnc_rec->fcounters[i] == 0) ||
@@ -253,7 +303,8 @@ static void darshan_log_agg_pnetcdf_files(void *rec, void *agg_rec, int init_fla
agg_pnc_rec->fcounters[i] = pnc_rec->fcounters[i];
}
break;
- case PNETCDF_F_CLOSE_TIMESTAMP:
+ case PNETCDF_F_OPEN_END_TIMESTAMP:
+ case PNETCDF_F_CLOSE_END_TIMESTAMP:
/* maximum */
if(pnc_rec->fcounters[i] > agg_pnc_rec->fcounters[i])
{
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/commit/598a2e85ab3350ba8336a130930758597e986a4b
--
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/commit/598a2e85ab3350ba8336a130930758597e986a4b
You're receiving this email because of your account on xgitlab.cels.anl.gov.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20190116/6a76791a/attachment-0001.html>
More information about the Darshan-commits
mailing list