[Darshan-commits] [Git][darshan/darshan][dev-hxhim-module] 3 commits: update for API changes
Rob Latham
xgitlab at cels.anl.gov
Fri Apr 6 14:27:46 CDT 2018
Rob Latham pushed to branch dev-hxhim-module at darshan / darshan
Commits:
b3a21f85 by Rob Latham at 2018-03-27T12:31:31-05:00
update for API changes
- - - - -
929df735 by Rob Latham at 2018-04-06T14:27:09-05:00
get mdhim module working with more than one process
- - - - -
63009f34 by Rob Latham at 2018-04-06T14:27:24-05:00
debugging junk: DO NOT MERGE!
- - - - -
3 changed files:
- darshan-mdhim-log-format.h
- darshan-runtime/lib/darshan-mdhim.c
- darshan-util/darshan-mdhim-logutils.c
Changes:
=====================================
darshan-mdhim-log-format.h
=====================================
--- a/darshan-mdhim-log-format.h
+++ b/darshan-mdhim-log-format.h
@@ -14,10 +14,10 @@
/* number of 'put' function calls */\
X(MDHIM_PUTS) \
/* larget payload for a 'put' */ \
- X(MDHIM_PUT_MAX_SIZE)\
- /* number of 'get' function calls */\
X(MDHIM_GETS) \
/* largest get */ \
+ X(MDHIM_PUT_MAX_SIZE)\
+ /* number of 'get' function calls */\
X(MDHIM_GET_MAX_SIZE) \
/* how many servers? */ \
X(MDHIM_SERVERS) \
@@ -27,9 +27,9 @@
#define MDHIM_F_COUNTERS \
/* timestamp of the first call to a 'put' function */\
X(MDHIM_F_PUT_TIMESTAMP) \
- /* timer indicating duration of call to 'foo' with max MDHIM_PUT_MAX_DAT value */\
- X(MDHIM_F_PUT_MAX_DURATION) \
X(MDHIM_F_GET_TIMESTAMP) \
+ /* timer indicating longest (slowest) call to put/get */\
+ X(MDHIM_F_PUT_MAX_DURATION) \
X(MDHIM_F_GET_MAX_DURATION) \
/* end of counters */\
X(MDHIM_F_NUM_INDICES)
=====================================
darshan-runtime/lib/darshan-mdhim.c
=====================================
--- a/darshan-runtime/lib/darshan-mdhim.c
+++ b/darshan-runtime/lib/darshan-mdhim.c
@@ -26,13 +26,13 @@
* declarations for wrapped funcions, regardless of whether Darshan is used with
* statically or dynamically linked executables.
*/
-DARSHAN_FORWARD_DECL(mdhimPut, struct mdhim_brm_t *, (mdhim_t *md,
+DARSHAN_FORWARD_DECL(mdhimPut, mdhim_brm_t *, (mdhim_t *md,
void *key, int key_len, void *value, int value_len,
struct secondary_info *secondary_global_info,
struct secondary_info *secondary_local_info));
-DARSHAN_FORWARD_DECL(mdhimGet, struct mdhim_bgetrm_t *, (mdhim_t *md,
- struct index_t *index, void *key, int key_len, int op));
+DARSHAN_FORWARD_DECL(mdhimGet, mdhim_getrm_t *, (mdhim_t *md,
+ struct index *index, void *key, int key_len, int op));
DARSHAN_FORWARD_DECL(mdhimInit, int, (mdhim_t *md, mdhim_options_t *opts));
@@ -227,6 +227,7 @@ int DARSHAN_DECL(mdhimInit)(mdhim_t *md, mdhim_options_t *opts)
nr_servers, RECORD_STRING);
/* if we still don't have a valid reference, well that's too dang bad */
if (rec_ref) rec_ref->record_p->counters[MDHIM_SERVERS] = nr_servers;
+
MDHIM_POST_RECORD();
MAP_OR_FAIL(mdhimInit);
@@ -234,13 +235,13 @@ int DARSHAN_DECL(mdhimInit)(mdhim_t *md, mdhim_options_t *opts)
return ret;
}
-struct mdhim_brm_t *DARSHAN_DECL(mdhimPut)(mdhim_t *md,
+mdhim_brm_t *DARSHAN_DECL(mdhimPut)(mdhim_t *md,
void *key, int key_len,
void *value, int value_len,
- struct secondary_info *secondary_global_info,
- struct secondary_info * secondary_local_info)
+ secondary_info_t *secondary_global_info,
+ secondary_info_t *secondary_local_info)
{
- struct mdhim_brm_t *ret;
+ mdhim_brm_t *ret;
double tm1, tm2;
/* The MAP_OR_FAIL macro attempts to obtain the address of the actual
@@ -271,10 +272,11 @@ struct mdhim_brm_t *DARSHAN_DECL(mdhimPut)(mdhim_t *md,
return(ret);
}
-struct mdhim_bgetrm_t * DARSHAN_DECL(mdhimGet)(mdhim_t *md,
- struct index_t *index, void *key, int key_len, int op)
+mdhim_getrm_t * DARSHAN_DECL(mdhimGet)(mdhim_t *md,
+ struct index *index, void *key, int key_len,
+ enum TransportGetMessageOp op)
{
- struct mdhim_bgetrm_t *ret;
+ mdhim_getrm_t *ret;
double tm1, tm2;
MAP_OR_FAIL(mdhimGet);
@@ -403,6 +405,100 @@ static void mdhim_cleanup_runtime()
return;
}
+static void dump_record(struct darshan_mdhim_record *rec)
+{
+ int i, len;
+ char output[256];
+ len = snprintf(output, 256,
+ " put: %ld get: %ld put_max %ld get_max %ld num_servers %ld "
+ "put_t: %f get_t: %f put_max_t: %f get_max_t %f ",
+ rec->counters[MDHIM_PUTS], rec->counters[MDHIM_GETS],
+ rec->counters[MDHIM_PUT_MAX_SIZE],
+ rec->counters[MDHIM_GET_MAX_SIZE],
+ rec->counters[MDHIM_SERVERS],
+ rec->fcounters[MDHIM_F_PUT_TIMESTAMP],
+ rec->fcounters[MDHIM_F_GET_TIMESTAMP],
+ rec->fcounters[MDHIM_F_PUT_MAX_DURATION],
+ rec->fcounters[MDHIM_F_GET_MAX_DURATION]);
+ for (i=0; i< rec->counters[MDHIM_SERVERS]; i++) {
+ len += snprintf(output+len, 256-len,
+ "server %d: %d ", i, rec->server_histogram[i]);
+ }
+ printf("%s\n", output);
+}
+
+static void mdhim_record_reduction_op(void *infile_v, void *inoutfile_v,
+ int *len, MPI_Datatype *datatype)
+{
+ struct darshan_mdhim_record *tmp_rec;
+ struct darshan_mdhim_record *inrec = infile_v;
+ struct darshan_mdhim_record *inoutrec = inoutfile_v;
+ int i, j;
+
+ for (i=0; i< *len; i++) {
+ /* can't use 'sizeof': server count historgram */
+ tmp_rec = calloc(1,
+ MDHIM_RECORD_SIZE(inrec->counters[MDHIM_SERVERS]));
+ tmp_rec->base_rec.id = inrec->base_rec.id;
+ tmp_rec->base_rec.rank = -1;
+
+ for (j=MDHIM_PUTS; j<=MDHIM_GETS; j++) {
+ tmp_rec->counters[j] = inrec->counters[j] +
+ inoutrec->counters[j];
+ }
+
+ for (j=MDHIM_PUT_MAX_SIZE; j<=MDHIM_GET_MAX_SIZE; j++) {
+ tmp_rec->counters[j] = (
+ (inrec->counters[j] > inoutrec->counters[j] ) ?
+ inrec->counters[j] :
+ inoutrec->counters[j]);
+ }
+ tmp_rec->counters[MDHIM_SERVERS] = inrec->counters[MDHIM_SERVERS];
+
+ /* min non-zero value */
+ for (j=MDHIM_F_PUT_TIMESTAMP; j<=MDHIM_F_GET_TIMESTAMP; j++)
+ {
+ if (( inrec->fcounters[j] < inoutrec->fcounters[j] &&
+ inrec->fcounters[j] > 0)
+ || inoutrec->fcounters[j] == 0)
+ tmp_rec->fcounters[j] = inrec->fcounters[j];
+ else
+ tmp_rec->fcounters[j] = inoutrec->fcounters[j];
+ }
+ /* max */
+ for (j=MDHIM_F_PUT_MAX_DURATION; j<=MDHIM_F_GET_MAX_DURATION; j++)
+ {
+ tmp_rec->fcounters[j] = (
+ (inrec->fcounters[j] > inoutrec->fcounters[j]) ?
+ inrec->fcounters[j] :
+ inoutrec->fcounters[j]);
+ }
+ /* dealing with server histogram a little odd. Every client kept track
+ * of which servers it sent to, so we'll simply sum them all up. The
+ * data lives at the end of the struct (remember, alocated based on
+ * MDHIM_RECORD_SIZE macro) */
+ for (j=0; j< tmp_rec->counters[MDHIM_SERVERS]; j++) {
+ printf("%d: inrec %d inoutred %d\n",
+ j,
+ inrec->server_histogram[j],
+ inoutrec->server_histogram[j]);
+ tmp_rec->server_histogram[j] = inrec->server_histogram[j] +
+ inoutrec->server_histogram[j];
+ }
+ dump_record(tmp_rec);
+ memcpy(inoutrec, tmp_rec,
+ MDHIM_RECORD_SIZE(tmp_rec->counters[MDHIM_SERVERS]));
+ free(tmp_rec);
+
+ /* updating not as simple as incrementing, unfortunately */
+ infile_v = (char *) infile_v +
+ MDHIM_RECORD_SIZE(tmp_rec->counters[MDHIM_SERVERS]);
+ inoutfile_v = (char *)inoutfile_v +
+ MDHIM_RECORD_SIZE(tmp_rec->counters[MDHIM_SERVERS]);
+ /* XXX: when is it ok to free tmp_rec? */
+ }
+ return;
+}
/***********************************************************************
* shutdown function exported by the MDHIM module for coordinating with
* darshan-core *
@@ -418,29 +514,77 @@ static void mdhim_shutdown(
void **mdhim_buf,
int *mdhim_buf_sz)
{
+
+ int i, nr_servers=0;
+ /* other modules can declar this temporary record on the stack but I need a
+ * bit more space because of the server histogram */
+ struct mdhim_record_ref *rec_ref;
+ /* walking through these arrays will be awkward if there is more than one
+ * record: the 'server_histogram' field is variable */
+ struct darshan_mdhim_record *mdhim_rec_buf =
+ *(struct darshan_mdhim_record **)mdhim_buf;
+ struct darshan_mdhim_record *red_send_buf = NULL;
+ struct darshan_mdhim_record *red_recv_buf = NULL;
+ MPI_Datatype red_type;
+ MPI_Op red_op;
+
MDHIM_LOCK();
assert(mdhim_runtime);
- /* NOTE: this function can be used to run collective operations
- * prior to shutting down the module, as implied by the MPI
- * communicator passed in as the first agrument. Typically, module
- * developers will want to run a reduction on shared data records
- * (passed in in the 'shared_recs' array), but other collective
- * routines can be run here as well. For a detailed example
- * illustrating how to run shared file reductions, consider the
- * POSIX or MPIIO instrumentation modules, as they both implement
- * this functionality.
- */
+ /* taking the approach in darshan-mpiio.c, except MDHIM is always a "shared
+ * file" for now. */
+ assert(mdhim_runtime->rec_count == shared_rec_count);
- /* Just set the output size according to the number of records
- * currently being tracked. In general, the module can decide to
- * throw out records that have been previously registered by
- * shuffling around memory in 'mdhim_buf' -- 'mdhim_buf' and
- * 'mdhim_buf_sz' both are passed as pointers so they can be updated
- * by the shutdown function potentially
- */
- *mdhim_buf_sz = mdhim_runtime->rec_count *
- sizeof(struct darshan_mdhim_record);
+ /* unlike MPI-IO, we only have shared records */
+ /* can the number of mdhim servers change? I suppose if there were
+ * multiple mdhim instances, each instance could have a different number of
+ * servers. If that's the case, I'll have to make some of the memory allocations variable (and I don't do that yet) */
+ rec_ref = darshan_lookup_record_ref(mdhim_runtime->rec_id_hash,
+ &shared_recs[0], sizeof(darshan_record_id));
+ nr_servers = rec_ref->record_p->counters[MDHIM_SERVERS];
+
+ if (shared_rec_count && !getenv("DARSHAN_DISABLE_SHARED_REDUCTION"))
+ {
+ /* there is probably only one shared record */
+ for (i=1; i< shared_rec_count; i++)
+ {
+ rec_ref = darshan_lookup_record_ref(mdhim_runtime->rec_id_hash,
+ &shared_recs[i], sizeof(darshan_record_id));
+ assert(rec_ref);
+ assert(nr_servers == rec_ref->record_p->counters[MDHIM_SERVERS]);
+ }
+
+ red_send_buf = mdhim_rec_buf;
+ if (my_rank == 0)
+ {
+ red_recv_buf = malloc(shared_rec_count *
+ MDHIM_RECORD_SIZE(nr_servers));
+ if (!red_recv_buf)
+ {
+ MDHIM_UNLOCK();
+ return;
+ }
+ }
+ PMPI_Type_contiguous(MDHIM_RECORD_SIZE(nr_servers),
+ MPI_BYTE, &red_type);
+ PMPI_Type_commit(&red_type);
+ PMPI_Op_create(mdhim_record_reduction_op, 1, &red_op);
+ PMPI_Reduce(red_send_buf, red_recv_buf,
+ shared_rec_count, red_type, red_op, 0, mod_comm);
+
+ if (my_rank == 0)
+ {
+ memcpy(&(mdhim_rec_buf[0]), red_recv_buf,
+ shared_rec_count *
+ MDHIM_RECORD_SIZE(nr_servers));
+ free(red_recv_buf);
+ }
+
+ PMPI_Type_free(&red_type);
+ PMPI_Op_free(&red_op);
+ }
+ dump_record(mdhim_rec_buf);
+ *mdhim_buf_sz = shared_rec_count * sizeof (struct darshan_mdhim_record);
/* shutdown internal structures used for instrumenting */
mdhim_cleanup_runtime();
=====================================
darshan-util/darshan-mdhim-logutils.c
=====================================
--- a/darshan-util/darshan-mdhim-logutils.c
+++ b/darshan-util/darshan-mdhim-logutils.c
@@ -70,14 +70,7 @@ static int darshan_log_get_mdhim_record(darshan_fd fd, void** mdhim_buf_p)
if(fd->mod_map[DARSHAN_MDHIM_MOD].len == 0)
return(0);
- if(*mdhim_buf_p == NULL)
- {
- rec = malloc(sizeof(*rec));
- if(!rec)
- return(-1);
- }
-
- /* read the fixed-sized portion of the bdMDHIM module record from the
+ /* read the fixed-sized portion of the MDHIM module record from the
* darshan log file */
ret = darshan_log_get_mod(fd, DARSHAN_MDHIM_MOD, &tmp_rec,
sizeof(struct darshan_mdhim_record));
@@ -96,8 +89,7 @@ static int darshan_log_get_mdhim_record(darshan_fd fd, void** mdhim_buf_p)
DARSHAN_BSWAP64(&tmp_rec.counters[i]);
for (i=0; i< MDHIM_F_NUM_INDICES; i++)
DARSHAN_BSWAP64(&tmp_rec.fcounters[i]);
- for (i=0; i< tmp_rec.counters[MDHIM_SERVERS]; i++)
- DARSHAN_BSWAP32(&tmp_rec.server_histogram[i]);
+ DARSHAN_BSWAP32(&(tmp_rec.server_histogram[0]) );
}
if(*mdhim_buf_p == NULL)
@@ -112,6 +104,7 @@ static int darshan_log_get_mdhim_record(darshan_fd fd, void** mdhim_buf_p)
ret = darshan_log_get_mod(fd, DARSHAN_MDHIM_MOD,
&(rec->server_histogram[1]),
(rec->counters[MDHIM_SERVERS] - 1)*sizeof(int32_t));
+
if (ret < (rec->counters[MDHIM_SERVERS] -1)*sizeof(int32_t))
ret = -1;
else
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/compare/c180b5b30c59e5148405af8f334d2e9c7ba41d9d...63009f3434e2dccd85f1ea6abcbb46b9e5343cdd
---
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/compare/c180b5b30c59e5148405af8f334d2e9c7ba41d9d...63009f3434e2dccd85f1ea6abcbb46b9e5343cdd
You're receiving this email because of your account on xgitlab.cels.anl.gov.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20180406/39a5be53/attachment-0001.html>
More information about the Darshan-commits
mailing list