[Darshan-commits] [Git][darshan/darshan][dev-stdio-utils] 2 commits: implement slowest and variance counters in stdio

Philip Carns xgitlab at cels.anl.gov
Thu Jul 7 19:26:42 CDT 2016


Philip Carns pushed to branch dev-stdio-utils at darshan / darshan


Commits:
4b7e381c by Phil Carns at 2016-07-07T17:43:00-04:00
implement slowest and variance counters in stdio

- untested

- - - - -
b904f795 by Phil Carns at 2016-07-07T20:22:19-04:00
remove old TODO comment

- remaining items are tracked in gitlab issues

- - - - -


3 changed files:

- darshan-runtime/lib/darshan-stdio.c
- darshan-stdio-log-format.h
- darshan-util/darshan-stdio-logutils.c


Changes:

=====================================
darshan-runtime/lib/darshan-stdio.c
=====================================
--- a/darshan-runtime/lib/darshan-stdio.c
+++ b/darshan-runtime/lib/darshan-stdio.c
@@ -4,19 +4,6 @@
  *
  */
 
-/* TODO list (general) for this module:
- * - add stdio page to darshan-job-summary
- * - update darshan-parser to include stdio information in any relevant 
- *   performance estimate / summary modes
- * - figure out what to do about posix module compatibility
- *   - remove stdio counters in POSIX or keep and set to -1?
- *   - affected counters in posix module:
- *     - POSIX_FOPENS
- *     - POSIX_FREADS
- *     - POSIX_FWRITES
- *     - POSIX_FSEEKS
- */
-
 /* catalog of stdio functions instrumented by this module
  *
  * functions for opening streams
@@ -160,6 +147,9 @@ static void stdio_shutdown(
     int *stdio_buf_sz);
 static void stdio_record_reduction_op(void* infile_v, void* inoutfile_v,
     int *len, MPI_Datatype *datatype);
+static void stdio_shared_record_variance(
+    MPI_Comm mod_comm, struct darshan_stdio_file *inrec_array,
+    struct darshan_stdio_file *outrec_array, int shared_rec_count);
 static struct stdio_file_record_ref *stdio_track_new_file_record(
     darshan_record_id rec_id, const char *path);
 static void stdio_cleanup_runtime();
@@ -1006,6 +996,48 @@ static void stdio_record_reduction_op(void* infile_v, void* inoutfile_v,
                 tmp_file.fcounters[j] = inoutfile->fcounters[j];
         }
 
+        /* min (zeroes are ok here; some procs don't do I/O) */
+        if(infile->fcounters[STDIO_F_FASTEST_RANK_TIME] <
+           inoutfile->fcounters[STDIO_F_FASTEST_RANK_TIME])
+        {
+            tmp_file.counters[STDIO_FASTEST_RANK] =
+                infile->counters[STDIO_FASTEST_RANK];
+            tmp_file.counters[STDIO_FASTEST_RANK_BYTES] =
+                infile->counters[STDIO_FASTEST_RANK_BYTES];
+            tmp_file.fcounters[STDIO_F_FASTEST_RANK_TIME] =
+                infile->fcounters[STDIO_F_FASTEST_RANK_TIME];
+        }
+        else
+        {
+            tmp_file.counters[STDIO_FASTEST_RANK] =
+                inoutfile->counters[STDIO_FASTEST_RANK];
+            tmp_file.counters[STDIO_FASTEST_RANK_BYTES] =
+                inoutfile->counters[STDIO_FASTEST_RANK_BYTES];
+            tmp_file.fcounters[STDIO_F_FASTEST_RANK_TIME] =
+                inoutfile->fcounters[STDIO_F_FASTEST_RANK_TIME];
+        }
+
+        /* max */
+        if(infile->fcounters[STDIO_F_SLOWEST_RANK_TIME] >
+           inoutfile->fcounters[STDIO_F_SLOWEST_RANK_TIME])
+        {
+            tmp_file.counters[STDIO_SLOWEST_RANK] =
+                infile->counters[STDIO_SLOWEST_RANK];
+            tmp_file.counters[STDIO_SLOWEST_RANK_BYTES] =
+                infile->counters[STDIO_SLOWEST_RANK_BYTES];
+            tmp_file.fcounters[STDIO_F_SLOWEST_RANK_TIME] =
+                infile->fcounters[STDIO_F_SLOWEST_RANK_TIME];
+        }
+        else
+        {
+            tmp_file.counters[STDIO_SLOWEST_RANK] =
+                inoutfile->counters[STDIO_SLOWEST_RANK];
+            tmp_file.counters[STDIO_SLOWEST_RANK_BYTES] =
+                inoutfile->counters[STDIO_SLOWEST_RANK_BYTES];
+            tmp_file.fcounters[STDIO_F_SLOWEST_RANK_TIME] =
+                inoutfile->fcounters[STDIO_F_SLOWEST_RANK_TIME];
+        }
+
         /* update pointers */
         *inoutfile = tmp_file;
         inoutfile++;
@@ -1030,6 +1062,7 @@ static void stdio_shutdown(
     MPI_Datatype red_type;
     MPI_Op red_op;
     int stdio_rec_count;
+    double stdio_time;
 
     STDIO_LOCK();
     assert(stdio_runtime);
@@ -1048,6 +1081,31 @@ static void stdio_shutdown(
                 &shared_recs[i], sizeof(darshan_record_id));
             assert(rec_ref);
 
+            stdio_time =
+                rec_ref->file_rec->fcounters[STDIO_F_READ_TIME] +
+                rec_ref->file_rec->fcounters[STDIO_F_WRITE_TIME] +
+                rec_ref->file_rec->fcounters[STDIO_F_META_TIME];
+
+            /* initialize fastest/slowest info prior to the reduction */
+            rec_ref->file_rec->counters[STDIO_FASTEST_RANK] =
+                rec_ref->file_rec->base_rec.rank;
+            rec_ref->file_rec->counters[STDIO_FASTEST_RANK_BYTES] =
+                rec_ref->file_rec->counters[STDIO_BYTES_READ] +
+                rec_ref->file_rec->counters[STDIO_BYTES_WRITTEN];
+            rec_ref->file_rec->fcounters[STDIO_F_FASTEST_RANK_TIME] =
+                stdio_time;
+
+            /* until reduction occurs, we assume that this rank is both
+             * the fastest and slowest. It is up to the reduction operator
+             * to find the true min and max.
+             */
+            rec_ref->file_rec->counters[STDIO_SLOWEST_RANK] =
+                rec_ref->file_rec->counters[STDIO_FASTEST_RANK];
+            rec_ref->file_rec->counters[STDIO_SLOWEST_RANK_BYTES] =
+                rec_ref->file_rec->counters[STDIO_FASTEST_RANK_BYTES];
+            rec_ref->file_rec->fcounters[STDIO_F_SLOWEST_RANK_TIME] =
+                rec_ref->file_rec->fcounters[STDIO_F_FASTEST_RANK_TIME];
+
             rec_ref->file_rec->base_rec.rank = -1;
         }
 
@@ -1084,6 +1142,10 @@ static void stdio_shutdown(
         DARSHAN_MPI_CALL(PMPI_Reduce)(red_send_buf, red_recv_buf,
             shared_rec_count, red_type, red_op, 0, mod_comm);
 
+        /* get the time and byte variances for shared files */
+        stdio_shared_record_variance(mod_comm, red_send_buf, red_recv_buf,
+            shared_rec_count);
+
         /* clean up reduction state */
         if(my_rank == 0)
         {
@@ -1175,6 +1237,89 @@ static void stdio_cleanup_runtime()
     return;
 }
 
+static void stdio_shared_record_variance(MPI_Comm mod_comm,
+    struct darshan_stdio_file *inrec_array, struct darshan_stdio_file *outrec_array,
+    int shared_rec_count)
+{
+    MPI_Datatype var_dt;
+    MPI_Op var_op;
+    int i;
+    struct darshan_variance_dt *var_send_buf = NULL;
+    struct darshan_variance_dt *var_recv_buf = NULL;
+
+    DARSHAN_MPI_CALL(PMPI_Type_contiguous)(sizeof(struct darshan_variance_dt),
+        MPI_BYTE, &var_dt);
+    DARSHAN_MPI_CALL(PMPI_Type_commit)(&var_dt);
+
+    DARSHAN_MPI_CALL(PMPI_Op_create)(darshan_variance_reduce, 1, &var_op);
+
+    var_send_buf = malloc(shared_rec_count * sizeof(struct darshan_variance_dt));
+    if(!var_send_buf)
+        return;
+
+    if(my_rank == 0)
+    {
+        var_recv_buf = malloc(shared_rec_count * sizeof(struct darshan_variance_dt));
+
+        if(!var_recv_buf)
+            return;
+    }
+
+    /* get total i/o time variances for shared records */
+
+    for(i=0; i<shared_rec_count; i++)
+    {
+        var_send_buf[i].n = 1;
+        var_send_buf[i].S = 0;
+        var_send_buf[i].T = inrec_array[i].fcounters[STDIO_F_READ_TIME] +
+                            inrec_array[i].fcounters[STDIO_F_WRITE_TIME] +
+                            inrec_array[i].fcounters[STDIO_F_META_TIME];
+    }
+
+    DARSHAN_MPI_CALL(PMPI_Reduce)(var_send_buf, var_recv_buf, shared_rec_count,
+        var_dt, var_op, 0, mod_comm);
+
+    if(my_rank == 0)
+    {
+        for(i=0; i<shared_rec_count; i++)
+        {
+            outrec_array[i].fcounters[STDIO_F_VARIANCE_RANK_TIME] =
+                (var_recv_buf[i].S / var_recv_buf[i].n);
+        }
+    }
+
+    /* get total bytes moved variances for shared records */
+
+    for(i=0; i<shared_rec_count; i++)
+    {
+        var_send_buf[i].n = 1;
+        var_send_buf[i].S = 0;
+        var_send_buf[i].T = (double)
+                            inrec_array[i].counters[STDIO_BYTES_READ] +
+                            inrec_array[i].counters[STDIO_BYTES_WRITTEN];
+    }
+
+    DARSHAN_MPI_CALL(PMPI_Reduce)(var_send_buf, var_recv_buf, shared_rec_count,
+        var_dt, var_op, 0, mod_comm);
+
+    if(my_rank == 0)
+    {
+        for(i=0; i<shared_rec_count; i++)
+        {
+            outrec_array[i].fcounters[STDIO_F_VARIANCE_RANK_BYTES] =
+                (var_recv_buf[i].S / var_recv_buf[i].n);
+        }
+    }
+
+    DARSHAN_MPI_CALL(PMPI_Type_free)(&var_dt);
+    DARSHAN_MPI_CALL(PMPI_Op_free)(&var_op);
+    free(var_send_buf);
+    free(var_recv_buf);
+
+    return;
+}
+
+
 /*
  * Local variables:
  *  c-indent-level: 4


=====================================
darshan-stdio-log-format.h
=====================================
--- a/darshan-stdio-log-format.h
+++ b/darshan-stdio-log-format.h
@@ -29,6 +29,11 @@
     X(STDIO_MAX_BYTE_READ) \
     /* maximum byte (offset) written */\
     X(STDIO_MAX_BYTE_WRITTEN) \
+    /* rank and number of bytes moved for fastest/slowest ranks */\
+    X(STDIO_FASTEST_RANK) \
+    X(STDIO_FASTEST_RANK_BYTES) \
+    X(STDIO_SLOWEST_RANK) \
+    X(STDIO_SLOWEST_RANK_BYTES) \
     /* end of counters */\
     X(STDIO_NUM_INDICES)
 
@@ -55,6 +60,13 @@
     X(STDIO_F_WRITE_END_TIMESTAMP) \
     /* timestamp of last read completion */\
     X(STDIO_F_READ_END_TIMESTAMP) \
+    /* total i/o and meta time consumed for fastest/slowest ranks */\
+    X(STDIO_F_FASTEST_RANK_TIME) \
+    X(STDIO_F_SLOWEST_RANK_TIME) \
+    /* variance of total i/o time and bytes moved across all ranks */\
+    /* NOTE: for shared records only */\
+    X(STDIO_F_VARIANCE_RANK_TIME) \
+    X(STDIO_F_VARIANCE_RANK_BYTES) \
     /* end of counters */\
     X(STDIO_F_NUM_INDICES)
 


=====================================
darshan-util/darshan-stdio-logutils.c
=====================================
--- a/darshan-util/darshan-stdio-logutils.c
+++ b/darshan-util/darshan-stdio-logutils.c
@@ -141,9 +141,15 @@ static void darshan_log_print_stdio_description()
     printf("#   STDIO_{OPENS|WRITES|READS|SEEKS|FLUSHES} are types of operations.\n");
     printf("#   STDIO_BYTES_*: total bytes read and written.\n");
     printf("#   STDIO_MAX_BYTE_*: highest offset byte read and written.\n");
+    printf("#   STDIO_*_RANK: rank of the processes that were the fastest and slowest at I/O (for shared files).\n");
+    printf("#   STDIO_*_RANK_BYTES: bytes transferred by the fastest and slowest ranks (for shared files).\n");
     printf("#   STDIO_F_*_START_TIMESTAMP: timestamp of the first call to that type of function.\n");
     printf("#   STDIO_F_*_END_TIMESTAMP: timestamp of the completion of the last call to that type of function.\n");
     printf("#   STDIO_F_*_TIME: cumulative time spent in different types of functions.\n");
+    printf("#   STDIO_F_*_RANK_TIME: fastest and slowest I/O time for a single rank (for shared files).\n");
+    printf("#   STDIO_F_VARIANCE_RANK_*: variance of total I/O time and bytes moved for all ranks (for shared files).\n");
+
+    DARSHAN_PRINT_HEADER();
 
     return;
 }



View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/compare/cc6191892c87ca3c70c4477992547989c4c28da6...b904f7956817b8efb4bf786b459c2763844a6718
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20160707/c75341fe/attachment-0001.html>


More information about the Darshan-commits mailing list