[Darshan-commits] [Git][darshan/darshan][dev-hxhim-module] 3 commits: update for API changes

Rob Latham xgitlab at cels.anl.gov
Fri Apr 6 14:27:46 CDT 2018


Rob Latham pushed to branch dev-hxhim-module at darshan / darshan


Commits:
b3a21f85 by Rob Latham at 2018-03-27T12:31:31-05:00
update for API changes

- - - - -
929df735 by Rob Latham at 2018-04-06T14:27:09-05:00
get mdhim module working with more than one process

- - - - -
63009f34 by Rob Latham at 2018-04-06T14:27:24-05:00
debugging junk: DO NOT MERGE!

- - - - -


3 changed files:

- darshan-mdhim-log-format.h
- darshan-runtime/lib/darshan-mdhim.c
- darshan-util/darshan-mdhim-logutils.c


Changes:

=====================================
darshan-mdhim-log-format.h
=====================================
--- a/darshan-mdhim-log-format.h
+++ b/darshan-mdhim-log-format.h
@@ -14,10 +14,10 @@
     /* number of 'put' function calls */\
     X(MDHIM_PUTS) \
     /* larget payload for a 'put' */ \
-    X(MDHIM_PUT_MAX_SIZE)\
-    /* number of 'get' function calls */\
     X(MDHIM_GETS) \
     /* largest get */ \
+    X(MDHIM_PUT_MAX_SIZE)\
+    /* number of 'get' function calls */\
     X(MDHIM_GET_MAX_SIZE) \
     /* how many servers? */ \
     X(MDHIM_SERVERS) \
@@ -27,9 +27,9 @@
 #define MDHIM_F_COUNTERS \
     /* timestamp of the first call to a 'put' function */\
     X(MDHIM_F_PUT_TIMESTAMP) \
-    /* timer indicating duration of call to 'foo' with max MDHIM_PUT_MAX_DAT value */\
-    X(MDHIM_F_PUT_MAX_DURATION) \
     X(MDHIM_F_GET_TIMESTAMP) \
+    /* timer indicating longest (slowest) call to put/get */\
+    X(MDHIM_F_PUT_MAX_DURATION) \
     X(MDHIM_F_GET_MAX_DURATION) \
     /* end of counters */\
     X(MDHIM_F_NUM_INDICES)


=====================================
darshan-runtime/lib/darshan-mdhim.c
=====================================
--- a/darshan-runtime/lib/darshan-mdhim.c
+++ b/darshan-runtime/lib/darshan-mdhim.c
@@ -26,13 +26,13 @@
  * declarations for wrapped funcions, regardless of whether Darshan is used with
  * statically or dynamically linked executables.
  */
-DARSHAN_FORWARD_DECL(mdhimPut, struct mdhim_brm_t *, (mdhim_t *md,
+DARSHAN_FORWARD_DECL(mdhimPut, mdhim_brm_t *, (mdhim_t *md,
             void *key, int key_len, void *value, int value_len,
             struct secondary_info *secondary_global_info,
             struct secondary_info *secondary_local_info));
 
-DARSHAN_FORWARD_DECL(mdhimGet, struct mdhim_bgetrm_t *, (mdhim_t *md,
-        struct index_t *index, void *key, int key_len, int op));
+DARSHAN_FORWARD_DECL(mdhimGet, mdhim_getrm_t *, (mdhim_t *md,
+        struct index *index, void *key, int key_len, int op));
 
 DARSHAN_FORWARD_DECL(mdhimInit, int, (mdhim_t *md, mdhim_options_t *opts));
 
@@ -227,6 +227,7 @@ int DARSHAN_DECL(mdhimInit)(mdhim_t *md, mdhim_options_t *opts)
             nr_servers, RECORD_STRING);
     /* if we still don't have a valid reference, well that's too dang bad */
     if (rec_ref) rec_ref->record_p->counters[MDHIM_SERVERS] = nr_servers;
+
     MDHIM_POST_RECORD();
 
     MAP_OR_FAIL(mdhimInit);
@@ -234,13 +235,13 @@ int DARSHAN_DECL(mdhimInit)(mdhim_t *md, mdhim_options_t *opts)
     return ret;
 
 }
-struct mdhim_brm_t *DARSHAN_DECL(mdhimPut)(mdhim_t *md,
+mdhim_brm_t *DARSHAN_DECL(mdhimPut)(mdhim_t *md,
         void *key, int key_len,
         void *value, int value_len,
-        struct secondary_info *secondary_global_info,
-        struct secondary_info * secondary_local_info)
+        secondary_info_t *secondary_global_info,
+        secondary_info_t *secondary_local_info)
 {
-    struct mdhim_brm_t *ret;
+    mdhim_brm_t *ret;
     double tm1, tm2;
 
     /* The MAP_OR_FAIL macro attempts to obtain the address of the actual
@@ -271,10 +272,11 @@ struct mdhim_brm_t *DARSHAN_DECL(mdhimPut)(mdhim_t *md,
     return(ret);
 }
 
-struct mdhim_bgetrm_t * DARSHAN_DECL(mdhimGet)(mdhim_t *md,
-        struct index_t *index, void *key, int key_len, int op)
+mdhim_getrm_t * DARSHAN_DECL(mdhimGet)(mdhim_t *md,
+        struct index *index, void *key, int key_len,
+        enum TransportGetMessageOp op)
 {
-    struct mdhim_bgetrm_t *ret;
+    mdhim_getrm_t *ret;
     double tm1, tm2;
 
     MAP_OR_FAIL(mdhimGet);
@@ -403,6 +405,100 @@ static void mdhim_cleanup_runtime()
     return;
 }
 
+static void dump_record(struct darshan_mdhim_record *rec)
+{
+    int i, len;
+    char output[256];
+    len = snprintf(output, 256,
+            " put: %ld get: %ld put_max %ld get_max %ld num_servers %ld "
+            "put_t: %f get_t: %f put_max_t: %f get_max_t %f ",
+            rec->counters[MDHIM_PUTS], rec->counters[MDHIM_GETS],
+            rec->counters[MDHIM_PUT_MAX_SIZE],
+            rec->counters[MDHIM_GET_MAX_SIZE],
+            rec->counters[MDHIM_SERVERS],
+            rec->fcounters[MDHIM_F_PUT_TIMESTAMP],
+            rec->fcounters[MDHIM_F_GET_TIMESTAMP],
+            rec->fcounters[MDHIM_F_PUT_MAX_DURATION],
+            rec->fcounters[MDHIM_F_GET_MAX_DURATION]);
+    for (i=0; i< rec->counters[MDHIM_SERVERS]; i++) {
+        len += snprintf(output+len, 256-len,
+                "server %d: %d ", i, rec->server_histogram[i]);
+    }
+    printf("%s\n", output);
+}
+
+static void mdhim_record_reduction_op(void *infile_v, void *inoutfile_v,
+        int *len, MPI_Datatype *datatype)
+{
+    struct darshan_mdhim_record *tmp_rec;
+    struct darshan_mdhim_record *inrec = infile_v;
+    struct darshan_mdhim_record *inoutrec = inoutfile_v;
+    int i, j;
+
+    for (i=0; i< *len; i++) {
+        /* can't use 'sizeof': server count historgram */
+        tmp_rec = calloc(1,
+                MDHIM_RECORD_SIZE(inrec->counters[MDHIM_SERVERS]));
+        tmp_rec->base_rec.id = inrec->base_rec.id;
+        tmp_rec->base_rec.rank = -1;
+
+        for (j=MDHIM_PUTS; j<=MDHIM_GETS; j++) {
+            tmp_rec->counters[j] = inrec->counters[j] +
+                inoutrec->counters[j];
+        }
+
+        for (j=MDHIM_PUT_MAX_SIZE; j<=MDHIM_GET_MAX_SIZE; j++) {
+            tmp_rec->counters[j] = (
+                (inrec->counters[j] > inoutrec->counters[j] ) ?
+                inrec->counters[j] :
+                inoutrec->counters[j]);
+        }
+        tmp_rec->counters[MDHIM_SERVERS] = inrec->counters[MDHIM_SERVERS];
+
+        /* min non-zero value */
+        for (j=MDHIM_F_PUT_TIMESTAMP; j<=MDHIM_F_GET_TIMESTAMP; j++)
+        {
+            if (( inrec->fcounters[j] < inoutrec->fcounters[j] &&
+                        inrec->fcounters[j] > 0)
+                    || inoutrec->fcounters[j] == 0)
+                tmp_rec->fcounters[j] = inrec->fcounters[j];
+            else
+                tmp_rec->fcounters[j] = inoutrec->fcounters[j];
+        }
+        /* max */
+        for (j=MDHIM_F_PUT_MAX_DURATION; j<=MDHIM_F_GET_MAX_DURATION; j++)
+        {
+            tmp_rec->fcounters[j] = (
+                    (inrec->fcounters[j] > inoutrec->fcounters[j]) ?
+                        inrec->fcounters[j] :
+                        inoutrec->fcounters[j]);
+        }
+        /* dealing with server histogram a little odd.  Every client kept track
+         * of which servers it sent to, so we'll simply sum them all up.  The
+         * data lives at the end of the struct (remember, alocated based on
+         * MDHIM_RECORD_SIZE macro)  */
+        for (j=0; j< tmp_rec->counters[MDHIM_SERVERS]; j++) {
+            printf("%d: inrec %d inoutred %d\n",
+                    j,
+                    inrec->server_histogram[j],
+                    inoutrec->server_histogram[j]);
+            tmp_rec->server_histogram[j] = inrec->server_histogram[j] +
+                inoutrec->server_histogram[j];
+        }
+        dump_record(tmp_rec);
+        memcpy(inoutrec, tmp_rec,
+                MDHIM_RECORD_SIZE(tmp_rec->counters[MDHIM_SERVERS]));
+        free(tmp_rec);
+
+        /* updating not as simple as incrementing, unfortunately */
+        infile_v = (char *) infile_v +
+            MDHIM_RECORD_SIZE(tmp_rec->counters[MDHIM_SERVERS]);
+        inoutfile_v = (char *)inoutfile_v +
+            MDHIM_RECORD_SIZE(tmp_rec->counters[MDHIM_SERVERS]);
+        /* XXX: when is it ok to free tmp_rec? */
+    }
+    return;
+}
 /***********************************************************************
  * shutdown function exported by the MDHIM module for coordinating with
  * darshan-core *
@@ -418,29 +514,77 @@ static void mdhim_shutdown(
     void **mdhim_buf,
     int *mdhim_buf_sz)
 {
+
+    int i, nr_servers=0;
+    /* other modules can declar this temporary record on the stack but I need a
+     * bit more space because of the server histogram */
+    struct mdhim_record_ref *rec_ref;
+    /* walking through these arrays will be awkward if there is more than one
+     * record: the 'server_histogram' field is variable */
+    struct darshan_mdhim_record *mdhim_rec_buf =
+        *(struct darshan_mdhim_record **)mdhim_buf;
+    struct darshan_mdhim_record *red_send_buf = NULL;
+    struct darshan_mdhim_record *red_recv_buf = NULL;
+    MPI_Datatype red_type;
+    MPI_Op red_op;
+
     MDHIM_LOCK();
     assert(mdhim_runtime);
 
-    /* NOTE: this function can be used to run collective operations
-     * prior to shutting down the module, as implied by the MPI
-     * communicator passed in as the first agrument. Typically, module
-     * developers will want to run a reduction on shared data records
-     * (passed in in the 'shared_recs' array), but other collective
-     * routines can be run here as well. For a detailed example
-     * illustrating how to run shared file reductions, consider the
-     * POSIX or MPIIO instrumentation modules, as they both implement
-     * this functionality.
-     */
+    /* taking the approach in darshan-mpiio.c, except MDHIM is always a "shared
+     * file" for now. */
+    assert(mdhim_runtime->rec_count == shared_rec_count);
 
-    /* Just set the output size according to the number of records
-     * currently being tracked. In general, the module can decide to
-     * throw out records that have been previously registered by
-     * shuffling around memory in 'mdhim_buf' -- 'mdhim_buf' and
-     * 'mdhim_buf_sz' both are passed as pointers so they can be updated
-     * by the shutdown function potentially
-     */
-    *mdhim_buf_sz = mdhim_runtime->rec_count *
-        sizeof(struct darshan_mdhim_record);
+    /* unlike MPI-IO, we only have shared records */
+    /* can the number of mdhim servers change? I suppose if there were
+     * multiple mdhim instances, each instance could have a different number of
+     * servers.  If that's the case, I'll have to make some of the memory allocations variable (and I don't do that yet) */
+    rec_ref = darshan_lookup_record_ref(mdhim_runtime->rec_id_hash,
+            &shared_recs[0], sizeof(darshan_record_id));
+    nr_servers = rec_ref->record_p->counters[MDHIM_SERVERS];
+
+    if (shared_rec_count && !getenv("DARSHAN_DISABLE_SHARED_REDUCTION"))
+    {
+        /* there is probably only one shared record */
+        for (i=1; i< shared_rec_count; i++)
+        {
+            rec_ref = darshan_lookup_record_ref(mdhim_runtime->rec_id_hash,
+                    &shared_recs[i], sizeof(darshan_record_id));
+            assert(rec_ref);
+            assert(nr_servers == rec_ref->record_p->counters[MDHIM_SERVERS]);
+        }
+
+        red_send_buf = mdhim_rec_buf;
+        if (my_rank == 0)
+        {
+            red_recv_buf = malloc(shared_rec_count *
+                    MDHIM_RECORD_SIZE(nr_servers));
+            if (!red_recv_buf)
+            {
+                MDHIM_UNLOCK();
+                return;
+            }
+        }
+        PMPI_Type_contiguous(MDHIM_RECORD_SIZE(nr_servers),
+                MPI_BYTE, &red_type);
+        PMPI_Type_commit(&red_type);
+        PMPI_Op_create(mdhim_record_reduction_op, 1, &red_op);
+        PMPI_Reduce(red_send_buf, red_recv_buf,
+                shared_rec_count, red_type, red_op, 0, mod_comm);
+
+        if (my_rank == 0)
+        {
+            memcpy(&(mdhim_rec_buf[0]), red_recv_buf,
+                    shared_rec_count *
+                    MDHIM_RECORD_SIZE(nr_servers));
+            free(red_recv_buf);
+        }
+
+        PMPI_Type_free(&red_type);
+        PMPI_Op_free(&red_op);
+    }
+    dump_record(mdhim_rec_buf);
+    *mdhim_buf_sz = shared_rec_count * sizeof (struct darshan_mdhim_record);
 
     /* shutdown internal structures used for instrumenting */
     mdhim_cleanup_runtime();


=====================================
darshan-util/darshan-mdhim-logutils.c
=====================================
--- a/darshan-util/darshan-mdhim-logutils.c
+++ b/darshan-util/darshan-mdhim-logutils.c
@@ -70,14 +70,7 @@ static int darshan_log_get_mdhim_record(darshan_fd fd, void** mdhim_buf_p)
     if(fd->mod_map[DARSHAN_MDHIM_MOD].len == 0)
         return(0);
 
-    if(*mdhim_buf_p == NULL)
-    {
-        rec = malloc(sizeof(*rec));
-        if(!rec)
-            return(-1);
-    }
-
-    /* read the fixed-sized portion of the bdMDHIM module record from the
+    /* read the fixed-sized portion of the MDHIM module record from the
      * darshan log file */
     ret = darshan_log_get_mod(fd, DARSHAN_MDHIM_MOD, &tmp_rec,
         sizeof(struct darshan_mdhim_record));
@@ -96,8 +89,7 @@ static int darshan_log_get_mdhim_record(darshan_fd fd, void** mdhim_buf_p)
             DARSHAN_BSWAP64(&tmp_rec.counters[i]);
         for (i=0; i< MDHIM_F_NUM_INDICES; i++)
             DARSHAN_BSWAP64(&tmp_rec.fcounters[i]);
-        for (i=0; i< tmp_rec.counters[MDHIM_SERVERS]; i++)
-            DARSHAN_BSWAP32(&tmp_rec.server_histogram[i]);
+        DARSHAN_BSWAP32(&(tmp_rec.server_histogram[0]) );
     }
 
     if(*mdhim_buf_p == NULL)
@@ -112,6 +104,7 @@ static int darshan_log_get_mdhim_record(darshan_fd fd, void** mdhim_buf_p)
         ret = darshan_log_get_mod(fd, DARSHAN_MDHIM_MOD,
                 &(rec->server_histogram[1]),
                 (rec->counters[MDHIM_SERVERS] - 1)*sizeof(int32_t));
+
         if (ret < (rec->counters[MDHIM_SERVERS] -1)*sizeof(int32_t))
             ret = -1;
         else



View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/compare/c180b5b30c59e5148405af8f334d2e9c7ba41d9d...63009f3434e2dccd85f1ea6abcbb46b9e5343cdd

---
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/compare/c180b5b30c59e5148405af8f334d2e9c7ba41d9d...63009f3434e2dccd85f1ea6abcbb46b9e5343cdd
You're receiving this email because of your account on xgitlab.cels.anl.gov.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20180406/39a5be53/attachment-0001.html>


More information about the Darshan-commits mailing list