[Darshan-commits] [Darshan] branch, dev-bgq-mod, updated. darshan-2.3.1-105-g94efcc1

Service Account git at mcs.anl.gov
Mon Jun 29 17:42:23 CDT 2015


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "".

The branch, dev-bgq-mod has been updated
       via  94efcc1729a753e7cdb2894f8219262c67ba3961 (commit)
       via  7dc249cbc6ed3f9ef7263270443067a744d5111f (commit)
      from  c3f4cafd7cbb8a4d6bf044de9660fc95be93c2e7 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 94efcc1729a753e7cdb2894f8219262c67ba3961
Author: Kevin Harms <harms at alcf.anl.gov>
Date:   Mon Jun 29 22:41:43 2015 +0000

    Updates to bgq module core

commit 7dc249cbc6ed3f9ef7263270443067a744d5111f
Author: Kevin Harms <harms at alcf.anl.gov>
Date:   Mon Jun 29 22:40:21 2015 +0000

    Initial parser code

-----------------------------------------------------------------------

Summary of changes:
 darshan-runtime/lib/darshan-bgq.c                  |   82 ++++++++++++++++++--
 darshan-runtime/lib/darshan-core.c                 |    5 +
 darshan-runtime/lib/darshan-mpiio.c                |   23 +++---
 darshan-runtime/lib/darshan-posix.c                |   28 ++++---
 darshan-util/Makefile.in                           |    6 +-
 ...han-posix-logutils.c => darshan-bgq-logutils.c} |   10 +-
 darshan-util/darshan-bgq-logutils.h                |   15 ++++
 ...darshan-mpiio-parser.c => darshan-bgq-parser.c} |   61 ++++++++-------
 8 files changed, 164 insertions(+), 66 deletions(-)
 copy darshan-util/{darshan-posix-logutils.c => darshan-bgq-logutils.c} (79%)
 create mode 100644 darshan-util/darshan-bgq-logutils.h
 copy darshan-util/{darshan-mpiio-parser.c => darshan-bgq-parser.c} (76%)


Diff of changes:
diff --git a/darshan-runtime/lib/darshan-bgq.c b/darshan-runtime/lib/darshan-bgq.c
index 64c0825..ea0a2cb 100644
--- a/darshan-runtime/lib/darshan-bgq.c
+++ b/darshan-runtime/lib/darshan-bgq.c
@@ -41,7 +41,6 @@ struct bgq_runtime
     struct darshan_bgq_record record;
 };
 
-/* null_runtime is the global data structure encapsulating "NULL" module state */
 static struct bgq_runtime *bgq_runtime = NULL;
 static pthread_mutex_t bgq_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
 
@@ -52,12 +51,14 @@ static int instrumentation_disabled = 0;
 static int my_rank = -1;
 
 /* internal helper functions for the "NULL" module */
-static void bgq_runtime_initialize(void);
+void bgq_runtime_initialize(void);
 
 /* forward declaration for module functions needed to interface with darshan-core */
 static void bgq_begin_shutdown(void);
 static void bgq_get_output_data(void **buffer, int *size);
 static void bgq_shutdown(void);
+static void bgq_setup_reduction(darshan_record_id *shared_recs,int *shared_rec_count,void **send_buf,void **recv_buf,int *rec_size);
+static void bgq_record_reduction_op(void* infile_v,void* inoutfile_v,int *len,MPI_Datatype *datatype);
 
 /* macros for obtaining/releasing the "NULL" module lock */
 #define BGQ_LOCK() pthread_mutex_lock(&bgq_runtime_mutex)
@@ -70,10 +71,13 @@ static void capture(struct darshan_bgq_record *rec)
 {
 #ifdef __bgq__
     Personality_t person;
+    int r;
 
     rec->counters[BGQ_CSJOBID] = Kernel_GetJobID();
     rec->counters[BGQ_RANKSPERNODE] = Kernel_ProcessCount();
 
+    rec->counters[BGQ_INODES] = MPIX_IO_node();
+
     r = Kernel_GetPersonality(&person, sizeof(person));
     if (r == 0)
     {
@@ -104,14 +108,14 @@ static void capture(struct darshan_bgq_record *rec)
  * Internal functions for manipulating BGQ module state *
  **********************************************************/
 
-static void bgq_runtime_initialize()
+void bgq_runtime_initialize()
 {
     /* struct of function pointers for interfacing with darshan-core */
     struct darshan_module_funcs bgq_mod_fns =
     {
         .begin_shutdown = bgq_begin_shutdown,
-        .setup_reduction = NULL, 
-        .record_reduction_op = NULL, 
+        .setup_reduction = bgq_setup_reduction, 
+        .record_reduction_op = bgq_record_reduction_op, 
         .get_output_data = bgq_get_output_data,
         .shutdown = bgq_shutdown
     };
@@ -198,9 +202,9 @@ static void bgq_get_output_data(
      * I/O records, and set the output size according to the number of records
      * currently being tracked.
      */
-    if (bgq_runtime)
+    if ((bgq_runtime) && (my_rank == 0))
     {
-        *buffer = (void *)&bgq_runtime->record;
+        *buffer = &bgq_runtime->record;
         *size = sizeof(struct darshan_bgq_record);
     }
     else
@@ -224,6 +228,70 @@ static void bgq_shutdown()
     return;
 }
 
+static void bgq_setup_reduction(
+    darshan_record_id *shared_recs,
+    int *shared_rec_count,
+    void **send_buf,
+    void **recv_buf,
+    int *rec_size)
+{
+    int i;
+    int found;
+
+    for (i = 0; i < *shared_rec_count; i++)
+    {
+        if (shared_recs[i] == bgq_runtime->record.f_id)
+        {
+            found = 1;
+            break;
+        }
+    }
+
+    if (found)
+    {
+        printf("found bgq shared record\n");
+        *rec_size = sizeof(struct darshan_bgq_record);
+        *shared_rec_count = 1;
+        *send_buf = &bgq_runtime->record;
+        *recv_buf = &bgq_runtime->record;
+    }
+
+    return;
+}
+
+static void bgq_record_reduction_op(
+    void* infile_v,
+    void* inoutfile_v,
+    int* len,
+    MPI_Datatype *datatype)
+{
+    int i;
+    int j;
+    struct darshan_bgq_record *infile = infile_v;
+    struct darshan_bgq_record *inoutfile = inoutfile_v;
+
+    for (i = 0; i<*len; i++)
+    {
+        for (j = 0; j < BGQ_NUM_INDICES; j++)
+        {
+            if (infile->counters[j] != inoutfile->counters[j])
+            {
+                // unexpected
+                fprintf(stderr,
+                        "%lu counter mismatch: %d [%lu] [%lu]\n",
+                        infile->f_id,
+                        j,
+                        infile->counters[j],
+                        inoutfile->counters[j]);
+            }
+        }
+        infile++;
+        inoutfile++;
+    }
+
+    return;
+}
+
 /*
  * Local variables:
  *  c-indent-level: 4
diff --git a/darshan-runtime/lib/darshan-core.c b/darshan-runtime/lib/darshan-core.c
index aee8c6f..9cde110 100644
--- a/darshan-runtime/lib/darshan-core.c
+++ b/darshan-runtime/lib/darshan-core.c
@@ -208,6 +208,11 @@ void darshan_core_initialize(int argc, char **argv)
         }
     }
 
+#ifdef __bgq__
+    extern void bgq_runtime_initialize();
+    bgq_runtime_initialize();
+#endif
+
     return;
 }
 
diff --git a/darshan-runtime/lib/darshan-mpiio.c b/darshan-runtime/lib/darshan-mpiio.c
index 2d3a82d..0cba6da 100644
--- a/darshan-runtime/lib/darshan-mpiio.c
+++ b/darshan-runtime/lib/darshan-mpiio.c
@@ -453,16 +453,19 @@ static void mpiio_setup_reduction(
 {
     struct mpiio_file_runtime *file;
     int i;
+    int count;
 
     assert(mpiio_runtime);
 
     /* necessary initialization of shared records (e.g., change rank to -1) */
-    for(i = 0; i < *shared_rec_count; i++)
+    for(i = 0,count = 0; i < *shared_rec_count; i++)
     {
         HASH_FIND(hlink, mpiio_runtime->file_hash, &shared_recs[i],
             sizeof(darshan_record_id), file);
-        assert(file);
-
+        if (!file) {
+            continue;
+        }
+        count++;
         file->file_record->rank = -1;
     }
 
@@ -475,22 +478,18 @@ static void mpiio_setup_reduction(
 
     /* make *send_buf point to the shared files at the end of sorted array */
     *send_buf =
-        &(mpiio_runtime->file_record_array[mpiio_runtime->file_array_ndx-(*shared_rec_count)]);
+        &(mpiio_runtime->file_record_array[mpiio_runtime->file_array_ndx-(count)]);
 
     /* allocate memory for the reduction output on rank 0 */
-    if(my_rank == 0)
+    if((my_rank == 0) && (count > 0))
     {
-        *recv_buf = malloc(*shared_rec_count * sizeof(struct darshan_mpiio_file));
-        if(!(*recv_buf))
-            return;
+        *recv_buf = malloc(count * sizeof(struct darshan_mpiio_file));
+        mpiio_runtime->red_buf = *recv_buf;
     }
 
     *rec_size = sizeof(struct darshan_mpiio_file);
 
-    /* TODO: cleaner way to do this? */
-    if(my_rank == 0)
-        mpiio_runtime->red_buf = *recv_buf;
-    mpiio_runtime->shared_rec_count = *shared_rec_count;
+    mpiio_runtime->shared_rec_count = count;
 
     return;
 }
diff --git a/darshan-runtime/lib/darshan-posix.c b/darshan-runtime/lib/darshan-posix.c
index c66a118..2f03f7a 100644
--- a/darshan-runtime/lib/darshan-posix.c
+++ b/darshan-runtime/lib/darshan-posix.c
@@ -1655,15 +1655,19 @@ static void posix_setup_reduction(
     struct posix_file_runtime *file;
     int i;
     double posix_time;
+    int count;
 
     assert(posix_runtime);
 
     /* necessary initialization of shared records (e.g., change rank to -1) */
-    for(i = 0; i < *shared_rec_count; i++)
+    for(i = 0,count = 0; i < *shared_rec_count; i++)
     {
         HASH_FIND(hlink, posix_runtime->file_hash, &shared_recs[i],
             sizeof(darshan_record_id), file);
-        assert(file);
+        if (!file) {
+            continue;
+        }
+        count++;
 
         posix_time =
             file->file_record->fcounters[POSIX_F_READ_TIME] +
@@ -1702,22 +1706,22 @@ static void posix_setup_reduction(
 
     /* make *send_buf point to the shared files at the end of sorted array */
     *send_buf =
-        &(posix_runtime->file_record_array[posix_runtime->file_array_ndx-(*shared_rec_count)]);
+        &(posix_runtime->file_record_array[posix_runtime->file_array_ndx-(count)]);
 
     /* allocate memory for the reduction output on rank 0 */
-    if(my_rank == 0)
+    if((my_rank == 0) && (count > 0))
     {
-        *recv_buf = malloc(*shared_rec_count * sizeof(struct darshan_posix_file));
-        if(!(*recv_buf))
-            return;
+        printf("shared count = %d\n", count);
+        *recv_buf = malloc(count * sizeof(struct darshan_posix_file));
+        posix_runtime->red_buf = *recv_buf;
+        printf("recv_buf = %p\n", *recv_buf);
+//        if(!(*recv_buf))
+//            return;
     }
 
     *rec_size = sizeof(struct darshan_posix_file);
 
-    /* TODO: cleaner way to do this? */
-    if(my_rank == 0)
-        posix_runtime->red_buf = *recv_buf;
-    posix_runtime->shared_rec_count = *shared_rec_count;
+    posix_runtime->shared_rec_count = count;
 
     return;
 }
@@ -1962,7 +1966,7 @@ static void posix_get_output_data(
 
     /* TODO: cleaner way to do this? */
     /* clean up reduction state */
-    if(my_rank == 0)
+    if((my_rank == 0) && (posix_runtime->red_buf))
     {
         int tmp_ndx = posix_runtime->file_array_ndx - posix_runtime->shared_rec_count;
         memcpy(&(posix_runtime->file_record_array[tmp_ndx]), posix_runtime->red_buf,
diff --git a/darshan-util/Makefile.in b/darshan-util/Makefile.in
index d3c9f6a..20544f1 100644
--- a/darshan-util/Makefile.in
+++ b/darshan-util/Makefile.in
@@ -1,4 +1,4 @@
-all: libdarshan-util.a darshan-base-parser darshan-posix-parser darshan-mpiio-parser
+all: libdarshan-util.a darshan-base-parser darshan-posix-parser darshan-mpiio-parser darshan-bgq-parser
 
 DESTDIR =
 srcdir = @srcdir@
@@ -74,6 +74,9 @@ darshan-posix-parser: darshan-posix-parser.c darshan-logutils.h darshan-posix-lo
 darshan-mpiio-parser: darshan-mpiio-parser.c darshan-logutils.h darshan-mpiio-logutils.h $(DARSHAN_LOG_FORMAT) $(srcdir)/../darshan-mpiio-log-format.h libdarshan-util.a | uthash-1.9.2
 	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ $(LIBS) 
 
+darshan-bgq-parser: darshan-bgq-parser.c darshan-logutils.h darshan-bgq-logutils.h $(DARSHAN_LOG_FORMAT) $(srcdir)/../darshan-bgq-log-format.h libdarshan-util.a | uthash-1.9.2
+	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ $(LIBS) 
+
 #darshan-parser: darshan-parser.c $(DARSHAN_LOG_FORMAT) darshan-logutils.h darshan-logutils.o | uthash-1.9.2
 #	$(CC) $(CFLAGS) $(LDFLAGS) $< darshan-logutils.o -o $@ $(LIBS) 
 
@@ -109,6 +112,7 @@ install:: all
 	install -m 755 darshan-base-parser $(bindir)
 	install -m 755 darshan-posix-parser $(bindir)
 	install -m 755 darshan-mpiio-parser $(bindir)
+	install -m 755 darshan-bgq-parser $(bindir)
 #	install -m 755 darshan-convert $(bindir)
 #	install -m 755 darshan-diff $(bindir)
 #	install -m 755 darshan-analyzer $(bindir)
diff --git a/darshan-util/darshan-posix-logutils.c b/darshan-util/darshan-bgq-logutils.c
similarity index 79%
copy from darshan-util/darshan-posix-logutils.c
copy to darshan-util/darshan-bgq-logutils.c
index 51ba78d..f96ec79 100644
--- a/darshan-util/darshan-posix-logutils.c
+++ b/darshan-util/darshan-bgq-logutils.c
@@ -17,9 +17,9 @@
 #include <fcntl.h>
 #include <errno.h>
 
-#include "darshan-posix-logutils.h"
+#include "darshan-bgq-logutils.h"
 
-int darshan_log_get_posix_file(darshan_fd fd, struct darshan_posix_file *file)
+int darshan_log_get_bgq_file(darshan_fd fd, struct darshan_bgq_record *file)
 {
     int i;
     int ret;
@@ -29,7 +29,7 @@ int darshan_log_get_posix_file(darshan_fd fd, struct darshan_posix_file *file)
      */
     memset(file, 0, sizeof(*file));
 
-    ret = darshan_log_get_moddat(fd, DARSHAN_POSIX_MOD,
+    ret = darshan_log_get_moddat(fd, DARSHAN_BGQ_MOD,
         (void *)file, sizeof(*file));
     if(ret == 1)
     {
@@ -38,9 +38,9 @@ int darshan_log_get_posix_file(darshan_fd fd, struct darshan_posix_file *file)
             /* swap bytes if necessary */
             DARSHAN_BSWAP64(&file->f_id);
             DARSHAN_BSWAP64(&file->rank);
-            for(i=0; i<POSIX_NUM_INDICES; i++)
+            for(i=0; i<BGQ_NUM_INDICES; i++)
                 DARSHAN_BSWAP64(&file->counters[i]);
-            for(i=0; i<POSIX_F_NUM_INDICES; i++)
+            for(i=0; i<BGQ_F_NUM_INDICES; i++)
                 DARSHAN_BSWAP64(&file->fcounters[i]);
         }
     }
diff --git a/darshan-util/darshan-bgq-logutils.h b/darshan-util/darshan-bgq-logutils.h
new file mode 100644
index 0000000..66ae974
--- /dev/null
+++ b/darshan-util/darshan-bgq-logutils.h
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) 2015 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ *
+ */
+
+#ifndef __DARSHAN_BGQ_LOG_UTILS_H
+#define __DARSHAN_BGQ_LOG_UTILS_H
+
+#include "darshan-logutils.h"
+#include "darshan-bgq-log-format.h"
+
+int darshan_log_get_bgq_file(darshan_fd fd, struct darshan_bgq_record *file);
+
+#endif
diff --git a/darshan-util/darshan-mpiio-parser.c b/darshan-util/darshan-bgq-parser.c
similarity index 76%
copy from darshan-util/darshan-mpiio-parser.c
copy to darshan-util/darshan-bgq-parser.c
index 683194f..03fdc27 100644
--- a/darshan-util/darshan-mpiio-parser.c
+++ b/darshan-util/darshan-bgq-parser.c
@@ -17,7 +17,7 @@
 #include <assert.h>
 
 #include "darshan-logutils.h"
-#include "darshan-mpiio-logutils.h"
+#include "darshan-bgq-logutils.h"
 #include "uthash-1.9.2/src/uthash.h"
 
 int main(int argc, char **argv)
@@ -38,7 +38,7 @@ int main(int argc, char **argv)
     char *token;
     char *save;
     char buffer[DARSHAN_JOB_METADATA_LEN];
-    struct darshan_mpiio_file next_file;
+    struct darshan_bgq_record next_file;
 
     assert(argc == 2);
     filename = argv[1];
@@ -53,11 +53,6 @@ int main(int argc, char **argv)
         return(-1);
     }
 
-    /**************************************************************/
-    /* TODO: some of this code should be shared or in a separate executable
-     * instead of repeated in each module parser
-     */
-
     /* read darshan log header */
     ret = darshan_log_getheader(fd, &header);
     if(ret < 0)
@@ -87,7 +82,7 @@ int main(int argc, char **argv)
 
     /* print job summary */
     printf("# darshan log version: %s\n", header.version_string);
-    printf("# size of MPIIO file statistics: %zu bytes\n", sizeof(struct darshan_mpiio_file));
+    printf("# size of BGQ file statistics: %zu bytes\n", sizeof(struct darshan_bgq_record));
     printf("# size of job statistics: %zu bytes\n", sizeof(struct darshan_job));
     printf("# exe: %s\n", tmp_string);
     printf("# uid: %" PRId64 "\n", job.uid);
@@ -148,15 +143,12 @@ int main(int argc, char **argv)
         return(-1);
     }
 
-    /* end TODO */
-    /*******************************************/
-
     printf("\n*** FILE RECORD DATA ***\n");
  
-    ret = darshan_log_get_mpiio_file(fd, &next_file);
+    ret = darshan_log_get_bgq_file(fd, &next_file);
     if(ret < 0)
     {
-        fprintf(stderr, "darshan_log_get_mpiio_file() failed to read next record.\n");
+        fprintf(stderr, "darshan_log_get_posix_file() failed to read next record.\n");
         darshan_log_close(fd);
         return(-1);
     }
@@ -167,7 +159,7 @@ int main(int argc, char **argv)
         return(0);
     }
    
-    /* loop over each stored MPIIO file record and print counters */
+    /* loop over each stored POSIX file record and print counters */
     i = 1;
     do
     {
@@ -177,24 +169,35 @@ int main(int argc, char **argv)
 
         printf("\tRecord %d: id=%"PRIu64" (path=%s, rank=%"PRId64")\n",
             i, next_file.f_id, ref->rec.name, next_file.rank);
-        /* TODO: does it make sense to put these in a header or something?
-         * Down side of listing them here is ordering dependency between enum
-         * in header and names here.
-         */
+
         printf(
-            "\t\tMPIIO_INDEP_OPENS:\t%"PRIu64"\n"
-            "\t\tMPIIO_COLL_OPENS:\t%"PRIu64"\n"
-            "\t\tMPIIO_HINTS:\t%"PRIu64"\n"
-            "\t\tMPIIO_F_META_TIME:\t%lf\n"
-            "\t\tMPIIO_F_OPEN_TIMESTAMP:\t%lf\n",
-            next_file.counters[DARSHAN_MPIIO_INDEP_OPENS],
-            next_file.counters[DARSHAN_MPIIO_COLL_OPENS],
-            next_file.counters[DARSHAN_MPIIO_HINTS],
-            next_file.fcounters[DARSHAN_MPIIO_F_META_TIME],
-            next_file.fcounters[DARSHAN_MPIIO_F_OPEN_TIMESTAMP]);
+            "\t\tBGQ_CSJOBID:\t%"PRIu64"\n"
+            "\t\tBGQ_NNODES:\t%"PRIu64"\n"
+            "\t\tBGQ_RPN:\t%"PRIu64"\n"
+            "\t\tBGQ_DDRPERNODE:\t%"PRIu64"\n"
+            "\t\tBGQ_INODES:\t%"PRIu64"\n"
+            "\t\tBGQ_ANODES:\t%"PRIu64"\n"
+            "\t\tBGQ_BNODES:\t%"PRIu64"\n"
+            "\t\tBGQ_CNODES:\t%"PRIu64"\n"
+            "\t\tBGQ_DNODES:\t%"PRIu64"\n"
+            "\t\tBGQ_ENODES:\t%"PRIu64"\n"
+            "\t\tBGQ_TORUSEN:\t%"PRIx64"\n"
+            "\t\tBGQ_TIMESTAMP:\t%lf\n",
+            next_file.counters[BGQ_CSJOBID],
+            next_file.counters[BGQ_NNODES],
+            next_file.counters[BGQ_RANKSPERNODE],
+            next_file.counters[BGQ_DDRPERNODE],
+            next_file.counters[BGQ_INODES],
+            next_file.counters[BGQ_ANODES],
+            next_file.counters[BGQ_BNODES],
+            next_file.counters[BGQ_CNODES],
+            next_file.counters[BGQ_DNODES],
+            next_file.counters[BGQ_ENODES],
+            next_file.counters[BGQ_TORUSENABLED],
+            next_file.fcounters[BGQ_F_TIMESTAMP]);
 
         i++;
-    } while((ret = darshan_log_get_mpiio_file(fd, &next_file)) == 1);
+    } while((ret = darshan_log_get_bgq_file(fd, &next_file)) == 1);
 
     /* free mount info */
     for(i=0; i<mount_count; i++)


hooks/post-receive
--



More information about the Darshan-commits mailing list