[Darshan-commits] [Git][darshan/darshan][dev-stdio-utils] 2 commits: implement slowest and variance counters in stdio
Philip Carns
xgitlab at cels.anl.gov
Thu Jul 7 19:26:42 CDT 2016
Philip Carns pushed to branch dev-stdio-utils at darshan / darshan
Commits:
4b7e381c by Phil Carns at 2016-07-07T17:43:00-04:00
implement slowest and variance counters in stdio
- untested
- - - - -
b904f795 by Phil Carns at 2016-07-07T20:22:19-04:00
remove old TODO comment
- remaining items are tracked in gitlab issues
- - - - -
3 changed files:
- darshan-runtime/lib/darshan-stdio.c
- darshan-stdio-log-format.h
- darshan-util/darshan-stdio-logutils.c
Changes:
=====================================
darshan-runtime/lib/darshan-stdio.c
=====================================
--- a/darshan-runtime/lib/darshan-stdio.c
+++ b/darshan-runtime/lib/darshan-stdio.c
@@ -4,19 +4,6 @@
*
*/
-/* TODO list (general) for this module:
- * - add stdio page to darshan-job-summary
- * - update darshan-parser to include stdio information in any relevant
- * performance estimate / summary modes
- * - figure out what to do about posix module compatibility
- * - remove stdio counters in POSIX or keep and set to -1?
- * - affected counters in posix module:
- * - POSIX_FOPENS
- * - POSIX_FREADS
- * - POSIX_FWRITES
- * - POSIX_FSEEKS
- */
-
/* catalog of stdio functions instrumented by this module
*
* functions for opening streams
@@ -160,6 +147,9 @@ static void stdio_shutdown(
int *stdio_buf_sz);
static void stdio_record_reduction_op(void* infile_v, void* inoutfile_v,
int *len, MPI_Datatype *datatype);
+static void stdio_shared_record_variance(
+ MPI_Comm mod_comm, struct darshan_stdio_file *inrec_array,
+ struct darshan_stdio_file *outrec_array, int shared_rec_count);
static struct stdio_file_record_ref *stdio_track_new_file_record(
darshan_record_id rec_id, const char *path);
static void stdio_cleanup_runtime();
@@ -1006,6 +996,48 @@ static void stdio_record_reduction_op(void* infile_v, void* inoutfile_v,
tmp_file.fcounters[j] = inoutfile->fcounters[j];
}
+ /* min (zeroes are ok here; some procs don't do I/O) */
+ if(infile->fcounters[STDIO_F_FASTEST_RANK_TIME] <
+ inoutfile->fcounters[STDIO_F_FASTEST_RANK_TIME])
+ {
+ tmp_file.counters[STDIO_FASTEST_RANK] =
+ infile->counters[STDIO_FASTEST_RANK];
+ tmp_file.counters[STDIO_FASTEST_RANK_BYTES] =
+ infile->counters[STDIO_FASTEST_RANK_BYTES];
+ tmp_file.fcounters[STDIO_F_FASTEST_RANK_TIME] =
+ infile->fcounters[STDIO_F_FASTEST_RANK_TIME];
+ }
+ else
+ {
+ tmp_file.counters[STDIO_FASTEST_RANK] =
+ inoutfile->counters[STDIO_FASTEST_RANK];
+ tmp_file.counters[STDIO_FASTEST_RANK_BYTES] =
+ inoutfile->counters[STDIO_FASTEST_RANK_BYTES];
+ tmp_file.fcounters[STDIO_F_FASTEST_RANK_TIME] =
+ inoutfile->fcounters[STDIO_F_FASTEST_RANK_TIME];
+ }
+
+ /* max */
+ if(infile->fcounters[STDIO_F_SLOWEST_RANK_TIME] >
+ inoutfile->fcounters[STDIO_F_SLOWEST_RANK_TIME])
+ {
+ tmp_file.counters[STDIO_SLOWEST_RANK] =
+ infile->counters[STDIO_SLOWEST_RANK];
+ tmp_file.counters[STDIO_SLOWEST_RANK_BYTES] =
+ infile->counters[STDIO_SLOWEST_RANK_BYTES];
+ tmp_file.fcounters[STDIO_F_SLOWEST_RANK_TIME] =
+ infile->fcounters[STDIO_F_SLOWEST_RANK_TIME];
+ }
+ else
+ {
+ tmp_file.counters[STDIO_SLOWEST_RANK] =
+ inoutfile->counters[STDIO_SLOWEST_RANK];
+ tmp_file.counters[STDIO_SLOWEST_RANK_BYTES] =
+ inoutfile->counters[STDIO_SLOWEST_RANK_BYTES];
+ tmp_file.fcounters[STDIO_F_SLOWEST_RANK_TIME] =
+ inoutfile->fcounters[STDIO_F_SLOWEST_RANK_TIME];
+ }
+
/* update pointers */
*inoutfile = tmp_file;
inoutfile++;
@@ -1030,6 +1062,7 @@ static void stdio_shutdown(
MPI_Datatype red_type;
MPI_Op red_op;
int stdio_rec_count;
+ double stdio_time;
STDIO_LOCK();
assert(stdio_runtime);
@@ -1048,6 +1081,31 @@ static void stdio_shutdown(
&shared_recs[i], sizeof(darshan_record_id));
assert(rec_ref);
+ stdio_time =
+ rec_ref->file_rec->fcounters[STDIO_F_READ_TIME] +
+ rec_ref->file_rec->fcounters[STDIO_F_WRITE_TIME] +
+ rec_ref->file_rec->fcounters[STDIO_F_META_TIME];
+
+ /* initialize fastest/slowest info prior to the reduction */
+ rec_ref->file_rec->counters[STDIO_FASTEST_RANK] =
+ rec_ref->file_rec->base_rec.rank;
+ rec_ref->file_rec->counters[STDIO_FASTEST_RANK_BYTES] =
+ rec_ref->file_rec->counters[STDIO_BYTES_READ] +
+ rec_ref->file_rec->counters[STDIO_BYTES_WRITTEN];
+ rec_ref->file_rec->fcounters[STDIO_F_FASTEST_RANK_TIME] =
+ stdio_time;
+
+ /* until reduction occurs, we assume that this rank is both
+ * the fastest and slowest. It is up to the reduction operator
+ * to find the true min and max.
+ */
+ rec_ref->file_rec->counters[STDIO_SLOWEST_RANK] =
+ rec_ref->file_rec->counters[STDIO_FASTEST_RANK];
+ rec_ref->file_rec->counters[STDIO_SLOWEST_RANK_BYTES] =
+ rec_ref->file_rec->counters[STDIO_FASTEST_RANK_BYTES];
+ rec_ref->file_rec->fcounters[STDIO_F_SLOWEST_RANK_TIME] =
+ rec_ref->file_rec->fcounters[STDIO_F_FASTEST_RANK_TIME];
+
rec_ref->file_rec->base_rec.rank = -1;
}
@@ -1084,6 +1142,10 @@ static void stdio_shutdown(
DARSHAN_MPI_CALL(PMPI_Reduce)(red_send_buf, red_recv_buf,
shared_rec_count, red_type, red_op, 0, mod_comm);
+ /* get the time and byte variances for shared files */
+ stdio_shared_record_variance(mod_comm, red_send_buf, red_recv_buf,
+ shared_rec_count);
+
/* clean up reduction state */
if(my_rank == 0)
{
@@ -1175,6 +1237,89 @@ static void stdio_cleanup_runtime()
return;
}
+static void stdio_shared_record_variance(MPI_Comm mod_comm,
+ struct darshan_stdio_file *inrec_array, struct darshan_stdio_file *outrec_array,
+ int shared_rec_count)
+{
+ MPI_Datatype var_dt;
+ MPI_Op var_op;
+ int i;
+ struct darshan_variance_dt *var_send_buf = NULL;
+ struct darshan_variance_dt *var_recv_buf = NULL;
+
+ DARSHAN_MPI_CALL(PMPI_Type_contiguous)(sizeof(struct darshan_variance_dt),
+ MPI_BYTE, &var_dt);
+ DARSHAN_MPI_CALL(PMPI_Type_commit)(&var_dt);
+
+ DARSHAN_MPI_CALL(PMPI_Op_create)(darshan_variance_reduce, 1, &var_op);
+
+ var_send_buf = malloc(shared_rec_count * sizeof(struct darshan_variance_dt));
+ if(!var_send_buf)
+ return;
+
+ if(my_rank == 0)
+ {
+ var_recv_buf = malloc(shared_rec_count * sizeof(struct darshan_variance_dt));
+
+ if(!var_recv_buf)
+ return;
+ }
+
+ /* get total i/o time variances for shared records */
+
+ for(i=0; i<shared_rec_count; i++)
+ {
+ var_send_buf[i].n = 1;
+ var_send_buf[i].S = 0;
+ var_send_buf[i].T = inrec_array[i].fcounters[STDIO_F_READ_TIME] +
+ inrec_array[i].fcounters[STDIO_F_WRITE_TIME] +
+ inrec_array[i].fcounters[STDIO_F_META_TIME];
+ }
+
+ DARSHAN_MPI_CALL(PMPI_Reduce)(var_send_buf, var_recv_buf, shared_rec_count,
+ var_dt, var_op, 0, mod_comm);
+
+ if(my_rank == 0)
+ {
+ for(i=0; i<shared_rec_count; i++)
+ {
+ outrec_array[i].fcounters[STDIO_F_VARIANCE_RANK_TIME] =
+ (var_recv_buf[i].S / var_recv_buf[i].n);
+ }
+ }
+
+ /* get total bytes moved variances for shared records */
+
+ for(i=0; i<shared_rec_count; i++)
+ {
+ var_send_buf[i].n = 1;
+ var_send_buf[i].S = 0;
+ var_send_buf[i].T = (double)
+ inrec_array[i].counters[STDIO_BYTES_READ] +
+ inrec_array[i].counters[STDIO_BYTES_WRITTEN];
+ }
+
+ DARSHAN_MPI_CALL(PMPI_Reduce)(var_send_buf, var_recv_buf, shared_rec_count,
+ var_dt, var_op, 0, mod_comm);
+
+ if(my_rank == 0)
+ {
+ for(i=0; i<shared_rec_count; i++)
+ {
+ outrec_array[i].fcounters[STDIO_F_VARIANCE_RANK_BYTES] =
+ (var_recv_buf[i].S / var_recv_buf[i].n);
+ }
+ }
+
+ DARSHAN_MPI_CALL(PMPI_Type_free)(&var_dt);
+ DARSHAN_MPI_CALL(PMPI_Op_free)(&var_op);
+ free(var_send_buf);
+ free(var_recv_buf);
+
+ return;
+}
+
+
/*
* Local variables:
* c-indent-level: 4
=====================================
darshan-stdio-log-format.h
=====================================
--- a/darshan-stdio-log-format.h
+++ b/darshan-stdio-log-format.h
@@ -29,6 +29,11 @@
X(STDIO_MAX_BYTE_READ) \
/* maximum byte (offset) written */\
X(STDIO_MAX_BYTE_WRITTEN) \
+ /* rank and number of bytes moved for fastest/slowest ranks */\
+ X(STDIO_FASTEST_RANK) \
+ X(STDIO_FASTEST_RANK_BYTES) \
+ X(STDIO_SLOWEST_RANK) \
+ X(STDIO_SLOWEST_RANK_BYTES) \
/* end of counters */\
X(STDIO_NUM_INDICES)
@@ -55,6 +60,13 @@
X(STDIO_F_WRITE_END_TIMESTAMP) \
/* timestamp of last read completion */\
X(STDIO_F_READ_END_TIMESTAMP) \
+ /* total i/o and meta time consumed for fastest/slowest ranks */\
+ X(STDIO_F_FASTEST_RANK_TIME) \
+ X(STDIO_F_SLOWEST_RANK_TIME) \
+ /* variance of total i/o time and bytes moved across all ranks */\
+ /* NOTE: for shared records only */\
+ X(STDIO_F_VARIANCE_RANK_TIME) \
+ X(STDIO_F_VARIANCE_RANK_BYTES) \
/* end of counters */\
X(STDIO_F_NUM_INDICES)
=====================================
darshan-util/darshan-stdio-logutils.c
=====================================
--- a/darshan-util/darshan-stdio-logutils.c
+++ b/darshan-util/darshan-stdio-logutils.c
@@ -141,9 +141,15 @@ static void darshan_log_print_stdio_description()
printf("# STDIO_{OPENS|WRITES|READS|SEEKS|FLUSHES} are types of operations.\n");
printf("# STDIO_BYTES_*: total bytes read and written.\n");
printf("# STDIO_MAX_BYTE_*: highest offset byte read and written.\n");
+ printf("# STDIO_*_RANK: rank of the processes that were the fastest and slowest at I/O (for shared files).\n");
+ printf("# STDIO_*_RANK_BYTES: bytes transferred by the fastest and slowest ranks (for shared files).\n");
printf("# STDIO_F_*_START_TIMESTAMP: timestamp of the first call to that type of function.\n");
printf("# STDIO_F_*_END_TIMESTAMP: timestamp of the completion of the last call to that type of function.\n");
printf("# STDIO_F_*_TIME: cumulative time spent in different types of functions.\n");
+ printf("# STDIO_F_*_RANK_TIME: fastest and slowest I/O time for a single rank (for shared files).\n");
+ printf("# STDIO_F_VARIANCE_RANK_*: variance of total I/O time and bytes moved for all ranks (for shared files).\n");
+
+ DARSHAN_PRINT_HEADER();
return;
}
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/compare/cc6191892c87ca3c70c4477992547989c4c28da6...b904f7956817b8efb4bf786b459c2763844a6718
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20160707/c75341fe/attachment-0001.html>
More information about the Darshan-commits
mailing list