[Darshan-commits] [Git][darshan/darshan][mmap-dev] 2 commits: some more POSIX and common code cleanup

Shane Snyder xgitlab at cels.anl.gov
Fri Jun 10 10:00:05 CDT 2016


Shane Snyder pushed to branch mmap-dev at darshan / darshan


Commits:
abf1d5ea by Shane Snyder at 2016-06-10T09:57:50-05:00
some more POSIX and common code cleanup

- - - - -
31e76e2c by Shane Snyder at 2016-06-10T09:58:06-05:00
update MPI-IO module to reflect recent changes

- - - - -


6 changed files:

- darshan-runtime/Makefile.in
- darshan-runtime/darshan-common.h
- darshan-runtime/darshan.h
- darshan-runtime/lib/darshan-common.c
- darshan-runtime/lib/darshan-mpiio.c
- darshan-runtime/lib/darshan-posix.c


Changes:

=====================================
darshan-runtime/Makefile.in
=====================================
--- a/darshan-runtime/Makefile.in
+++ b/darshan-runtime/Makefile.in
@@ -35,8 +35,8 @@ CFLAGS_SHARED = -DDARSHAN_CONFIG_H=\"darshan-runtime-config.h\" -I . -I$(srcdir)
 
 LIBS = -lz @LIBBZ2@
 
-DARSHAN_STATIC_MOD_OBJS = lib/darshan-posix.o
-# TODO: lib/darshan-mpiio.o lib/darshan-hdf5.o lib/darshan-pnetcdf.o
+DARSHAN_STATIC_MOD_OBJS = lib/darshan-posix.o lib/darshan-mpiio.o
+# TODO: lib/darshan-hdf5.o lib/darshan-pnetcdf.o
 DARSHAN_DYNAMIC_MOD_OBJS = lib/darshan-posix.po lib/darshan-mpiio.po lib/darshan-hdf5.po lib/darshan-pnetcdf.po
 
 ifdef DARSHAN_USE_BGQ


=====================================
darshan-runtime/darshan-common.h
=====================================
--- a/darshan-runtime/darshan-common.h
+++ b/darshan-runtime/darshan-common.h
@@ -181,12 +181,6 @@ void darshan_iter_record_refs(
     void *hash_head,
     void (*iter_action)(void *));
 
-darshan_record_id darshan_record_id_from_path(
-    const char *path);
-
-darshan_record_id darshan_record_id_from_name(
-    const char *name);
-
 /* darshan_clean_file_path()
  *
  * Allocate a new string that contains a new cleaned-up version of


=====================================
darshan-runtime/darshan.h
=====================================
--- a/darshan-runtime/darshan.h
+++ b/darshan-runtime/darshan.h
@@ -81,8 +81,8 @@ typedef void (*darshan_module_shutdown)(
  *
  * Register module identifier 'mod_id' with the darshan-core runtime
  * environment, allowing the module to store I/O characterization data.
- * 'funcs' is a pointer to a structure containing each of the function
- * pointers required by darshan-core to shut down the module.
+ * 'mod_shutdown_func is a pointer to a function responsible for
+ * shutting down the module and returning final output data to darshan-core.
  * 'inout_mod_buf_size' is an input/output argument, with it being
  * set to the requested amount of module memory on input, and set to
  * the amount allocated by darshan-core on output. If given, 'rank' is


=====================================
darshan-runtime/lib/darshan-common.c
=====================================
--- a/darshan-runtime/lib/darshan-common.c
+++ b/darshan-runtime/lib/darshan-common.c
@@ -111,27 +111,6 @@ void darshan_iter_record_refs(void *hash_head, void (*iter_action)(void *))
     return;
 }
 
-darshan_record_id darshan_record_id_from_path(const char *path)
-{
-    char *newpath = NULL;
-    darshan_record_id rec_id;
-
-    newpath = darshan_clean_file_path(path);
-    if(!newpath)
-        newpath = (char *)path;
-
-    rec_id = darshan_record_id_from_name(newpath);
-
-    if(newpath != path)
-        free(newpath);
-    return(rec_id);
-}
-
-darshan_record_id darshan_record_id_from_name(const char *name)
-{
-    return(darshan_core_gen_record_id(name));
-}
-
 char* darshan_clean_file_path(const char* path)
 {
     char* newpath = NULL;


=====================================
darshan-runtime/lib/darshan-mpiio.c
=====================================
--- a/darshan-runtime/lib/darshan-mpiio.c
+++ b/darshan-runtime/lib/darshan-mpiio.c
@@ -22,74 +22,40 @@
 #include <assert.h>
 #include <pthread.h>
 
-#include "uthash.h"
-
 #include "darshan.h"
 #include "darshan-dynamic.h"
 
-/* The mpiio_file_runtime structure maintains necessary runtime metadata
+/* The mpiio_file_record_ref structure maintains necessary runtime metadata
  * for the MPIIO file record (darshan_mpiio_file structure, defined in
- * darshan-mpiio-log-format.h) pointed to by 'file_record'. This metadata
+ * darshan-mpiio-log-format.h) pointed to by 'file_rec'. This metadata
  * assists with the instrumenting of specific statistics in the file record.
- * 'hlink' is a hash table link structure used to add/remove this record
- * from the hash table of MPIIO file records for this process. 
  *
  * RATIONALE: the MPIIO module needs to track some stateful, volatile 
  * information about each open file (like the current file offset, most recent 
  * access time, etc.) to aid in instrumentation, but this information can't be
  * stored in the darshan_mpiio_file struct because we don't want it to appear in
- * the final darshan log file.  We therefore associate a mpiio_file_runtime
- * struct with each darshan_mpiio_file struct in order to track this information.
-  *
- * NOTE: There is a one-to-one mapping of mpiio_file_runtime structs to
- * darshan_mpiio_file structs.
+ * the final darshan log file.  We therefore associate a mpiio_file_record_ref
+ * struct with each darshan_mpiio_file struct in order to track this information
+ * (i.e., the mapping between mpiio_file_record_ref structs to darshan_mpiio_file
+ * structs is one-to-one).
  *
- * NOTE: The mpiio_file_runtime struct contains a pointer to a darshan_mpiio_file
- * struct (see the *file_record member) rather than simply embedding an entire
- * darshan_mpiio_file struct.  This is done so that all of the darshan_mpiio_file
- * structs can be kept contiguous in memory as a single array to simplify
- * reduction, compression, and storage.
+ * NOTE: we use the 'darshan_record_ref' interface (in darshan-common) to
+ * associate different types of handles with this mpiio_file_record_ref struct.
+ * This allows us to index this struct (and the underlying file record) by using
+ * either the corresponding Darshan record identifier (derived from the filename)
+ * or by a generated MPI file handle, for instance. So, while there should only
+ * be a single Darshan record identifier that indexes a mpiio_file_record_ref,
+ * there could be multiple open file handles that index it.
  */
-struct mpiio_file_runtime
+struct mpiio_file_record_ref
 {
-    struct darshan_mpiio_file* file_record;
+    struct darshan_mpiio_file *file_rec;
     enum darshan_io_type last_io_type;
     double last_meta_end;
     double last_read_end;
     double last_write_end;
     void *access_root;
     int access_count;
-    UT_hash_handle hlink;
-};
-
-/* The mpiio_file_runtime_ref structure is used to associate a MPIIO
- * file handle with an already existing MPIIO file record. This is
- * necessary as many MPIIO I/O functions take only a file handle as input,
- * but MPIIO file records are indexed by their full file paths (i.e., darshan
- * record identifiers for MPIIO files are created by hashing the file path).
- * In other words, this structure is necessary as it allows us to look up a
- * file record either by a pathname (mpiio_file_runtime) or by MPIIO file
- * descriptor (mpiio_file_runtime_ref), depending on which parameters are
- * available. This structure includes another hash table link, since separate
- * hashes are maintained for mpiio_file_runtime structures and mpiio_file_runtime_ref
- * structures.
- *
- * RATIONALE: In theory the file handle information could be included in the
- * mpiio_file_runtime struct rather than in a separate structure here.  The
- * reason we don't do that is to handle the potential for an MPI implementation
- * to produce a new file handle instance each time MPI_File_open() is called on a
- * file.  Thus there might be multiple file handles referring to the same
- * underlying record.
- *
- * NOTE: there are potentially multiple mpiio_file_runtime_ref structures
- * referring to a single mpiio_file_runtime structure.  Most of the time there is
- * only one, however.
- */
-struct mpiio_file_runtime_ref
-{
-    struct mpiio_file_runtime* file;
-    MPI_File fh;
-    UT_hash_handle hlink;
 };
 
 /* The mpiio_runtime structure maintains necessary state for storing
@@ -98,92 +64,106 @@ struct mpiio_file_runtime_ref
  */
 struct mpiio_runtime
 {
-    struct mpiio_file_runtime* file_runtime_array;
-    struct darshan_mpiio_file* file_record_array;
-    int file_array_ndx;
-    struct mpiio_file_runtime* file_hash;
-    struct mpiio_file_runtime_ref* fh_hash;
+    void *rec_id_hash;
+    void *fh_hash;
+    int file_rec_count;
 };
 
+static void mpiio_runtime_initialize(
+    void);
+static struct mpiio_file_record_ref *mpiio_track_new_file_record(
+    darshan_record_id rec_id, const char *path);
+static int mpiio_record_compare(
+    const void* a, const void* b);
+static void mpiio_finalize_file_records(
+    void *rec_ref_p);
+static void mpiio_record_reduction_op(
+    void* infile_v, void* inoutfile_v, int *len, MPI_Datatype *datatype);
+static void mpiio_shared_record_variance(
+    MPI_Comm mod_comm, struct darshan_mpiio_file *inrec_array,
+    struct darshan_mpiio_file *outrec_array, int shared_rec_count);
+static void mpiio_cleanup_runtime(
+    void);
+
+static void mpiio_shutdown(
+    MPI_Comm mod_comm, darshan_record_id *shared_recs,
+    int shared_rec_count, void **mpiio_buf, int *mpiio_buf_sz);
+
 static struct mpiio_runtime *mpiio_runtime = NULL;
 static pthread_mutex_t mpiio_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
 static int instrumentation_disabled = 0;
 static int my_rank = -1;
 
-static void mpiio_runtime_initialize(void);
-static struct mpiio_file_runtime* mpiio_file_by_name(const char *name);
-static struct mpiio_file_runtime* mpiio_file_by_name_setfh(const char* name, MPI_File fh);
-static struct mpiio_file_runtime* mpiio_file_by_fh(MPI_File fh);
-static void mpiio_file_close_fh(MPI_File fh);
-static int mpiio_record_compare(const void* a, const void* b);
-static void mpiio_record_reduction_op(void* infile_v, void* inoutfile_v,
-    int *len, MPI_Datatype *datatype);
-static void mpiio_shared_record_variance(MPI_Comm mod_comm,
-    struct darshan_mpiio_file *inrec_array, struct darshan_mpiio_file *outrec_array,
-    int shared_rec_count);
-
-static void mpiio_begin_shutdown(void);
-static void mpiio_get_output_data(MPI_Comm mod_comm, darshan_record_id *shared_recs,
-    int shared_rec_count, void **mpiio_buf, int *mpiio_buf_sz);
-static void mpiio_shutdown(void);
-
 #define MPIIO_LOCK() pthread_mutex_lock(&mpiio_runtime_mutex)
 #define MPIIO_UNLOCK() pthread_mutex_unlock(&mpiio_runtime_mutex)
 
+#define MPIIO_PRE_RECORD() do { \
+    MPIIO_LOCK(); \
+    if(!mpiio_runtime && !instrumentation_disabled) mpiio_runtime_initialize(); \
+    if(!mpiio_runtime) { \
+        MPIIO_UNLOCK(); \
+        return(ret); \
+    } \
+} while(0)
+
+#define MPIIO_POST_RECORD() do { \
+    MPIIO_UNLOCK(); \
+} while(0)
+
 #define MPIIO_RECORD_READ(__ret, __fh, __count, __datatype, __counter, __tm1, __tm2) do { \
-    struct mpiio_file_runtime* file; \
+    struct mpiio_file_record_ref *rec_ref; \
     int size = 0; \
     double __elapsed = __tm2-__tm1; \
     if(__ret != MPI_SUCCESS) break; \
-    file = mpiio_file_by_fh(__fh); \
-    if(!file) break; \
+    rec_ref = darshan_lookup_record_ref(mpiio_runtime->fh_hash, &(__fh), sizeof(MPI_File)); \
+    if(!rec_ref) break; \
     DARSHAN_MPI_CALL(PMPI_Type_size)(__datatype, &size);  \
     size = size * __count; \
-    DARSHAN_BUCKET_INC(&(file->file_record->counters[MPIIO_SIZE_READ_AGG_0_100]), size); \
-    darshan_common_val_counter(&file->access_root, &file->access_count, size, \
-        &(file->file_record->counters[MPIIO_ACCESS1_ACCESS]), \
-        &(file->file_record->counters[MPIIO_ACCESS1_COUNT])); \
-    file->file_record->counters[MPIIO_BYTES_READ] += size; \
-    file->file_record->counters[__counter] += 1; \
-    if(file->last_io_type == DARSHAN_IO_WRITE) \
-        file->file_record->counters[MPIIO_RW_SWITCHES] += 1; \
-    file->last_io_type = DARSHAN_IO_READ; \
-    if(file->file_record->fcounters[MPIIO_F_READ_START_TIMESTAMP] == 0) \
-        file->file_record->fcounters[MPIIO_F_READ_START_TIMESTAMP] = __tm1; \
-    file->file_record->fcounters[MPIIO_F_READ_END_TIMESTAMP] = __tm2; \
-    if(file->file_record->fcounters[MPIIO_F_MAX_READ_TIME] < __elapsed) { \
-        file->file_record->fcounters[MPIIO_F_MAX_READ_TIME] = __elapsed; \
-        file->file_record->counters[MPIIO_MAX_READ_TIME_SIZE] = size; } \
-    DARSHAN_TIMER_INC_NO_OVERLAP(file->file_record->fcounters[MPIIO_F_READ_TIME], \
-        __tm1, __tm2, file->last_read_end); \
+    DARSHAN_BUCKET_INC(&(rec_ref->file_rec->counters[MPIIO_SIZE_READ_AGG_0_100]), size); \
+    darshan_common_val_counter(&rec_ref->access_root, &rec_ref->access_count, size, \
+        &(rec_ref->file_rec->counters[MPIIO_ACCESS1_ACCESS]), \
+        &(rec_ref->file_rec->counters[MPIIO_ACCESS1_COUNT])); \
+    rec_ref->file_rec->counters[MPIIO_BYTES_READ] += size; \
+    rec_ref->file_rec->counters[__counter] += 1; \
+    if(rec_ref->last_io_type == DARSHAN_IO_WRITE) \
+        rec_ref->file_rec->counters[MPIIO_RW_SWITCHES] += 1; \
+    rec_ref->last_io_type = DARSHAN_IO_READ; \
+    if(rec_ref->file_rec->fcounters[MPIIO_F_READ_START_TIMESTAMP] == 0) \
+        rec_ref->file_rec->fcounters[MPIIO_F_READ_START_TIMESTAMP] = __tm1; \
+    rec_ref->file_rec->fcounters[MPIIO_F_READ_END_TIMESTAMP] = __tm2; \
+    if(rec_ref->file_rec->fcounters[MPIIO_F_MAX_READ_TIME] < __elapsed) { \
+        rec_ref->file_rec->fcounters[MPIIO_F_MAX_READ_TIME] = __elapsed; \
+        rec_ref->file_rec->counters[MPIIO_MAX_READ_TIME_SIZE] = size; } \
+    DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[MPIIO_F_READ_TIME], \
+        __tm1, __tm2, rec_ref->last_read_end); \
 } while(0)
 
 #define MPIIO_RECORD_WRITE(__ret, __fh, __count, __datatype, __counter, __tm1, __tm2) do { \
-    struct mpiio_file_runtime* file; \
+    struct mpiio_file_record_ref *rec_ref; \
     int size = 0; \
     double __elapsed = __tm2-__tm1; \
     if(__ret != MPI_SUCCESS) break; \
-    file = mpiio_file_by_fh(__fh); \
-    if(!file) break; \
+    rec_ref = darshan_lookup_record_ref(mpiio_runtime->fh_hash, &(__fh), sizeof(MPI_File)); \
+    if(!rec_ref) break; \
     DARSHAN_MPI_CALL(PMPI_Type_size)(__datatype, &size);  \
     size = size * __count; \
-    DARSHAN_BUCKET_INC(&(file->file_record->counters[MPIIO_SIZE_WRITE_AGG_0_100]), size); \
-    darshan_common_val_counter(&file->access_root, &file->access_count, size, \
-        &(file->file_record->counters[MPIIO_ACCESS1_ACCESS]), \
-        &(file->file_record->counters[MPIIO_ACCESS1_COUNT])); \
-    file->file_record->counters[MPIIO_BYTES_WRITTEN] += size; \
-    file->file_record->counters[__counter] += 1; \
-    if(file->last_io_type == DARSHAN_IO_READ) \
-        file->file_record->counters[MPIIO_RW_SWITCHES] += 1; \
-    file->last_io_type = DARSHAN_IO_WRITE; \
-    if(file->file_record->fcounters[MPIIO_F_WRITE_START_TIMESTAMP] == 0) \
-        file->file_record->fcounters[MPIIO_F_WRITE_START_TIMESTAMP] = __tm1; \
-    file->file_record->fcounters[MPIIO_F_WRITE_END_TIMESTAMP] = __tm2; \
-    if(file->file_record->fcounters[MPIIO_F_MAX_WRITE_TIME] < __elapsed) { \
-        file->file_record->fcounters[MPIIO_F_MAX_WRITE_TIME] = __elapsed; \
-        file->file_record->counters[MPIIO_MAX_WRITE_TIME_SIZE] = size; } \
-    DARSHAN_TIMER_INC_NO_OVERLAP(file->file_record->fcounters[MPIIO_F_WRITE_TIME], \
-        __tm1, __tm2, file->last_write_end); \
+    DARSHAN_BUCKET_INC(&(rec_ref->file_rec->counters[MPIIO_SIZE_WRITE_AGG_0_100]), size); \
+    darshan_common_val_counter(&rec_ref->access_root, &rec_ref->access_count, size, \
+        &(rec_ref->file_rec->counters[MPIIO_ACCESS1_ACCESS]), \
+        &(rec_ref->file_rec->counters[MPIIO_ACCESS1_COUNT])); \
+    rec_ref->file_rec->counters[MPIIO_BYTES_WRITTEN] += size; \
+    rec_ref->file_rec->counters[__counter] += 1; \
+    if(rec_ref->last_io_type == DARSHAN_IO_READ) \
+        rec_ref->file_rec->counters[MPIIO_RW_SWITCHES] += 1; \
+    rec_ref->last_io_type = DARSHAN_IO_WRITE; \
+    if(rec_ref->file_rec->fcounters[MPIIO_F_WRITE_START_TIMESTAMP] == 0) \
+        rec_ref->file_rec->fcounters[MPIIO_F_WRITE_START_TIMESTAMP] = __tm1; \
+    rec_ref->file_rec->fcounters[MPIIO_F_WRITE_END_TIMESTAMP] = __tm2; \
+    if(rec_ref->file_rec->fcounters[MPIIO_F_MAX_WRITE_TIME] < __elapsed) { \
+        rec_ref->file_rec->fcounters[MPIIO_F_MAX_WRITE_TIME] = __elapsed; \
+        rec_ref->file_rec->counters[MPIIO_MAX_WRITE_TIME_SIZE] = size; } \
+    DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[MPIIO_F_WRITE_TIME], \
+        __tm1, __tm2, rec_ref->last_write_end); \
 } while(0)
 
 /**********************************************************
@@ -197,9 +177,11 @@ int MPI_File_open(MPI_Comm comm, char *filename, int amode, MPI_Info info, MPI_F
 #endif
 {
     int ret;
-    struct mpiio_file_runtime* file;
     char* tmp;
     int comm_size;
+    darshan_record_id rec_id;
+    struct mpiio_file_record_ref *rec_ref;
+    char *newpath;
     double tm1, tm2;
 
     tm1 = darshan_core_wtime();
@@ -208,9 +190,6 @@ int MPI_File_open(MPI_Comm comm, char *filename, int amode, MPI_Info info, MPI_F
 
     if(ret == MPI_SUCCESS)
     {
-        MPIIO_LOCK();
-        mpiio_runtime_initialize();
-
         /* use ROMIO approach to strip prefix if present */
         /* strip off prefix if there is one, but only skip prefixes
          * if they are greater than length one to allow for windows
@@ -221,31 +200,53 @@ int MPI_File_open(MPI_Comm comm, char *filename, int amode, MPI_Info info, MPI_F
             filename = tmp + 1;
         }
 
-        file = mpiio_file_by_name_setfh(filename, (*fh));
-        if(file)
+        MPIIO_PRE_RECORD();
+        /* cleanup pathname and check whether we should ignore it */
+        newpath = darshan_clean_file_path(filename);
+        if(!newpath) newpath = (char *)filename;
+        if(darshan_core_excluded_path(newpath))
+        {
+            if(newpath != filename) free(newpath);
+            return(ret);
+        }
+
+        /* lookup the corresponding record, and create a new one if one
+         * does not exist
+         */
+        rec_id = darshan_core_gen_record_id(newpath);
+        rec_ref = darshan_lookup_record_ref(mpiio_runtime->rec_id_hash, &rec_id,
+            sizeof(darshan_record_id));
+        if(!rec_ref) rec_ref = mpiio_track_new_file_record(rec_id, newpath);
+
+        if(rec_ref)
         {
-            file->file_record->counters[MPIIO_MODE] = amode;
+            rec_ref->file_rec->counters[MPIIO_MODE] = amode;
             DARSHAN_MPI_CALL(PMPI_Comm_size)(comm, &comm_size);
             if(comm_size == 1)
             {
-                file->file_record->counters[MPIIO_INDEP_OPENS] += 1;
+                rec_ref->file_rec->counters[MPIIO_INDEP_OPENS] += 1;
             }
             else
             {
-                file->file_record->counters[MPIIO_COLL_OPENS] += 1;
+                rec_ref->file_rec->counters[MPIIO_COLL_OPENS] += 1;
             }
             if(info != MPI_INFO_NULL)
             {
-                file->file_record->counters[MPIIO_HINTS] += 1;
+                rec_ref->file_rec->counters[MPIIO_HINTS] += 1;
             }
-            if(file->file_record->fcounters[MPIIO_F_OPEN_TIMESTAMP] == 0)
-                file->file_record->fcounters[MPIIO_F_OPEN_TIMESTAMP] = tm1;
+            if(rec_ref->file_rec->fcounters[MPIIO_F_OPEN_TIMESTAMP] == 0)
+                rec_ref->file_rec->fcounters[MPIIO_F_OPEN_TIMESTAMP] = tm1;
             DARSHAN_TIMER_INC_NO_OVERLAP(
-                file->file_record->fcounters[MPIIO_F_META_TIME],
-                tm1, tm2, file->last_meta_end);
+                rec_ref->file_rec->fcounters[MPIIO_F_META_TIME],
+                tm1, tm2, rec_ref->last_meta_end);
+
+            /* add a new record reference based on the MPI file handle */
+            darshan_add_record_ref(&(mpiio_runtime->fh_hash), fh,
+                sizeof(MPI_File), rec_ref);
         }
 
-        MPIIO_UNLOCK();
+        if(newpath != filename) free(newpath);
+        MPIIO_POST_RECORD();
     }
 
     return(ret);
@@ -261,10 +262,10 @@ int MPI_File_read(MPI_File fh, void *buf, int count,
     ret = DARSHAN_MPI_CALL(PMPI_File_read)(fh, buf, count, datatype, status);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_INDEP_READS, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -283,10 +284,10 @@ int MPI_File_write(MPI_File fh, void *buf, int count,
     ret = DARSHAN_MPI_CALL(PMPI_File_write)(fh, buf, count, datatype, status);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_INDEP_WRITES, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -301,10 +302,10 @@ int MPI_File_read_at(MPI_File fh, MPI_Offset offset, void *buf,
         count, datatype, status);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_INDEP_READS, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -324,10 +325,10 @@ int MPI_File_write_at(MPI_File fh, MPI_Offset offset, void *buf,
         count, datatype, status);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_INDEP_WRITES, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -341,10 +342,10 @@ int MPI_File_read_all(MPI_File fh, void * buf, int count, MPI_Datatype datatype,
         datatype, status);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_COLL_READS, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -362,10 +363,10 @@ int MPI_File_write_all(MPI_File fh, void * buf, int count, MPI_Datatype datatype
         datatype, status);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_COLL_WRITES, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -380,10 +381,10 @@ int MPI_File_read_at_all(MPI_File fh, MPI_Offset offset, void * buf,
         count, datatype, status);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_COLL_READS, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -403,10 +404,10 @@ int MPI_File_write_at_all(MPI_File fh, MPI_Offset offset, void * buf,
         count, datatype, status);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_COLL_WRITES, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -420,10 +421,10 @@ int MPI_File_read_shared(MPI_File fh, void * buf, int count, MPI_Datatype dataty
         datatype, status);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_INDEP_READS, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -441,10 +442,10 @@ int MPI_File_write_shared(MPI_File fh, void * buf, int count, MPI_Datatype datat
         datatype, status);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_INDEP_WRITES, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -459,10 +460,10 @@ int MPI_File_read_ordered(MPI_File fh, void * buf, int count,
         datatype, status);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_COLL_READS, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -482,10 +483,10 @@ int MPI_File_write_ordered(MPI_File fh, void * buf, int count,
          datatype, status);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_COLL_WRITES, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -498,10 +499,10 @@ int MPI_File_read_all_begin(MPI_File fh, void * buf, int count, MPI_Datatype dat
     ret = DARSHAN_MPI_CALL(PMPI_File_read_all_begin)(fh, buf, count, datatype);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_SPLIT_READS, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -518,10 +519,10 @@ int MPI_File_write_all_begin(MPI_File fh, void * buf, int count, MPI_Datatype da
     ret = DARSHAN_MPI_CALL(PMPI_File_write_all_begin)(fh, buf, count, datatype);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_SPLIT_WRITES, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -536,10 +537,10 @@ int MPI_File_read_at_all_begin(MPI_File fh, MPI_Offset offset, void * buf,
         count, datatype);
     tm2 = darshan_core_wtime();
     
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_SPLIT_READS, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -559,10 +560,10 @@ int MPI_File_write_at_all_begin(MPI_File fh, MPI_Offset offset, void * buf,
         buf, count, datatype);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_SPLIT_WRITES, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -576,10 +577,10 @@ int MPI_File_read_ordered_begin(MPI_File fh, void * buf, int count, MPI_Datatype
         datatype);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_SPLIT_READS, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -597,10 +598,10 @@ int MPI_File_write_ordered_begin(MPI_File fh, void * buf, int count, MPI_Datatyp
         datatype);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_SPLIT_WRITES, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -613,10 +614,10 @@ int MPI_File_iread(MPI_File fh, void * buf, int count, MPI_Datatype datatype, __
     ret = DARSHAN_MPI_CALL(PMPI_File_iread)(fh, buf, count, datatype, request);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_NB_READS, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -635,10 +636,10 @@ int MPI_File_iwrite(MPI_File fh, void * buf, int count,
     ret = DARSHAN_MPI_CALL(PMPI_File_iwrite)(fh, buf, count, datatype, request);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_NB_WRITES, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -653,10 +654,10 @@ int MPI_File_iread_at(MPI_File fh, MPI_Offset offset, void * buf,
         datatype, request);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_NB_READS, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -676,10 +677,10 @@ int MPI_File_iwrite_at(MPI_File fh, MPI_Offset offset, void * buf,
         count, datatype, request);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_NB_WRITES, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -694,10 +695,10 @@ int MPI_File_iread_shared(MPI_File fh, void * buf, int count,
         datatype, request);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_NB_READS, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
@@ -717,17 +718,17 @@ int MPI_File_iwrite_shared(MPI_File fh, void * buf, int count,
         datatype, request);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
+    MPIIO_PRE_RECORD();
     MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_NB_WRITES, tm1, tm2);
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
+
     return(ret);
 }
 
 int MPI_File_sync(MPI_File fh)
 {
     int ret;
-    struct mpiio_file_runtime* file;
+    struct mpiio_file_record_ref *rec_ref;
     double tm1, tm2;
 
     tm1 = darshan_core_wtime();
@@ -736,17 +737,17 @@ int MPI_File_sync(MPI_File fh)
 
     if(ret == MPI_SUCCESS)
     {
-        MPIIO_LOCK();
-        mpiio_runtime_initialize();
-        file = mpiio_file_by_fh(fh);
-        if(file)
+        MPIIO_PRE_RECORD();
+        rec_ref = darshan_lookup_record_ref(mpiio_runtime->fh_hash,
+            &fh, sizeof(MPI_File));
+        if(rec_ref)
         {
-            file->file_record->counters[MPIIO_SYNCS] += 1;
+            rec_ref->file_rec->counters[MPIIO_SYNCS] += 1;
             DARSHAN_TIMER_INC_NO_OVERLAP(
-                file->file_record->fcounters[MPIIO_F_WRITE_TIME],
-                tm1, tm2, file->last_write_end);
+                rec_ref->file_rec->fcounters[MPIIO_F_WRITE_TIME],
+                tm1, tm2, rec_ref->last_write_end);
         }
-        MPIIO_UNLOCK();
+        MPIIO_POST_RECORD();
     }
 
     return(ret);
@@ -761,7 +762,7 @@ int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype,
 #endif
 {
     int ret;
-    struct mpiio_file_runtime* file;
+    struct mpiio_file_record_ref *rec_ref;
     double tm1, tm2;
 
     tm1 = darshan_core_wtime();
@@ -771,21 +772,21 @@ int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype,
 
     if(ret == MPI_SUCCESS)
     {
-        MPIIO_LOCK();
-        mpiio_runtime_initialize();
-        file = mpiio_file_by_fh(fh);
-        if(file)
+        MPIIO_PRE_RECORD();
+        rec_ref = darshan_lookup_record_ref(mpiio_runtime->fh_hash,
+            &fh, sizeof(MPI_File));
+        if(rec_ref)
         {
-            file->file_record->counters[MPIIO_VIEWS] += 1;
+            rec_ref->file_rec->counters[MPIIO_VIEWS] += 1;
             if(info != MPI_INFO_NULL)
             {
-                file->file_record->counters[MPIIO_HINTS] += 1;
+                rec_ref->file_rec->counters[MPIIO_HINTS] += 1;
                 DARSHAN_TIMER_INC_NO_OVERLAP(
-                    file->file_record->fcounters[MPIIO_F_META_TIME],
-                    tm1, tm2, file->last_meta_end);
+                    rec_ref->file_rec->fcounters[MPIIO_F_META_TIME],
+                    tm1, tm2, rec_ref->last_meta_end);
            }
         }
-        MPIIO_UNLOCK();
+        MPIIO_POST_RECORD();
     }
 
     return(ret);
@@ -794,7 +795,7 @@ int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype,
 int MPI_File_close(MPI_File *fh)
 {
     int ret;
-    struct mpiio_file_runtime* file;
+    struct mpiio_file_record_ref *rec_ref;
     MPI_File tmp_fh = *fh;
     double tm1, tm2;
 
@@ -802,19 +803,20 @@ int MPI_File_close(MPI_File *fh)
     ret = DARSHAN_MPI_CALL(PMPI_File_close)(fh);
     tm2 = darshan_core_wtime();
 
-    MPIIO_LOCK();
-    mpiio_runtime_initialize();
-    file = mpiio_file_by_fh(tmp_fh);
-    if(file)
+    MPIIO_PRE_RECORD();
+    rec_ref = darshan_lookup_record_ref(mpiio_runtime->fh_hash,
+        &tmp_fh, sizeof(MPI_File));
+    if(rec_ref)
     {
-        file->file_record->fcounters[MPIIO_F_CLOSE_TIMESTAMP] =
+        rec_ref->file_rec->fcounters[MPIIO_F_CLOSE_TIMESTAMP] =
             darshan_core_wtime();
         DARSHAN_TIMER_INC_NO_OVERLAP(
-            file->file_record->fcounters[MPIIO_F_META_TIME],
-            tm1, tm2, file->last_meta_end);
-        mpiio_file_close_fh(tmp_fh);
+            rec_ref->file_rec->fcounters[MPIIO_F_META_TIME],
+            tm1, tm2, rec_ref->last_meta_end);
+        darshan_delete_record_ref(&(mpiio_runtime->fh_hash),
+            &tmp_fh, sizeof(MPI_File));
     }
-    MPIIO_UNLOCK();
+    MPIIO_POST_RECORD();
 
     return(ret);
 }
@@ -826,19 +828,7 @@ int MPI_File_close(MPI_File *fh)
 /* initialize data structures and register with darshan-core component */
 static void mpiio_runtime_initialize()
 {
-    struct darshan_module_funcs mpiio_mod_fns =
-    {
-        .begin_shutdown = &mpiio_begin_shutdown,
-        .get_output_data = &mpiio_get_output_data,
-        .shutdown = &mpiio_shutdown
-    };
-    void *mpiio_buf;
     int mpiio_buf_size;
-    int file_array_size;
-
-    /* don't do anything if already initialized or instrumenation is disabled */
-    if(mpiio_runtime || instrumentation_disabled)
-        return;
 
     /* try and store the default number of records for this module */
     mpiio_buf_size = DARSHAN_DEF_MOD_REC_COUNT * sizeof(struct darshan_mpiio_file);
@@ -846,9 +836,8 @@ static void mpiio_runtime_initialize()
     /* register the mpiio module with darshan core */
     darshan_core_register_module(
         DARSHAN_MPIIO_MOD,
-        &mpiio_mod_fns,
+        &mpiio_shutdown,
         &mpiio_buf_size,
-        &mpiio_buf,
         &my_rank,
         NULL);
 
@@ -867,159 +856,55 @@ static void mpiio_runtime_initialize()
     }
     memset(mpiio_runtime, 0, sizeof(*mpiio_runtime));
 
-    /* set number of trackable files for the MPIIO module according to the
-     * amount of memory returned by darshan-core
-     */
-    file_array_size = mpiio_buf_size / sizeof(struct darshan_mpiio_file);
-    mpiio_runtime->file_array_ndx = 0;
-
-    /* store pointer to MPIIO record buffer given by darshan-core */
-    mpiio_runtime->file_record_array = (struct darshan_mpiio_file *)mpiio_buf;
-
-    /* allocate array of runtime file records */
-    mpiio_runtime->file_runtime_array = malloc(file_array_size *
-                                               sizeof(struct mpiio_file_runtime));
-    if(!mpiio_runtime->file_runtime_array)
-    {
-        free(mpiio_runtime);
-        mpiio_runtime = NULL;
-        darshan_core_unregister_module(DARSHAN_MPIIO_MOD);
-        return;
-    }
-    memset(mpiio_runtime->file_runtime_array, 0, file_array_size *
-           sizeof(struct mpiio_file_runtime));
-
     return;
 }
 
-/* get a MPIIO file record for the given file path */
-static struct mpiio_file_runtime* mpiio_file_by_name(const char *name)
+static struct mpiio_file_record_ref *mpiio_track_new_file_record(
+    darshan_record_id rec_id, const char *path)
 {
-    struct mpiio_file_runtime *file = NULL;
-    struct darshan_mpiio_file *file_rec;
-    char *newname = NULL;
-    darshan_record_id file_id;
+    struct darshan_mpiio_file *file_rec = NULL;
+    struct mpiio_file_record_ref *rec_ref = NULL;
     int ret;
 
-    if(!mpiio_runtime || instrumentation_disabled)
+    rec_ref = malloc(sizeof(*rec_ref));
+    if(!rec_ref)
         return(NULL);
+    memset(rec_ref, 0, sizeof(*rec_ref));
 
-    newname = darshan_clean_file_path(name);
-    if(!newname)
-        newname = (char*)name;
-
-    /* lookup the unique id for this filename */
-    darshan_core_lookup_record(
-        newname,
-        &file_id);
-
-    /* search the hash table for this file record, and return if found */
-    HASH_FIND(hlink, mpiio_runtime->file_hash, &file_id, sizeof(darshan_record_id), file);
-    if(!file)
+    /* add a reference to this file record based on record id */
+    ret = darshan_add_record_ref(&(mpiio_runtime->rec_id_hash), &rec_id,
+        sizeof(darshan_record_id), rec_ref);
+    if(ret == 0)
     {
-        /* register the record with the darshan core component */
-        ret = darshan_core_register_record(file_id, newname, DARSHAN_MPIIO_MOD,
-            sizeof(struct darshan_mpiio_file), NULL);
-        if(ret == 1)
-        {
-            /* register was successful */
-            file = &(mpiio_runtime->file_runtime_array[mpiio_runtime->file_array_ndx]);
-            file->file_record =
-                &(mpiio_runtime->file_record_array[mpiio_runtime->file_array_ndx]);
-            file_rec = file->file_record;
-
-            file_rec->base_rec.id = file_id;
-            file_rec->base_rec.rank = my_rank;
-
-            /* add new record to file hash table */
-            HASH_ADD(hlink, mpiio_runtime->file_hash, file_record->base_rec.id,
-                sizeof(darshan_record_id), file);
-            mpiio_runtime->file_array_ndx++;
-        }
-    }
-
-    if(newname != name)
-        free(newname);
-    return(file);
-}
-
-/* get an MPIIO file record for the given file path, and also create a
- * reference structure using the corresponding file handle
- */
-static struct mpiio_file_runtime* mpiio_file_by_name_setfh(const char* name, MPI_File fh)
-{
-    struct mpiio_file_runtime* file;
-    struct mpiio_file_runtime_ref* ref;
-
-    if(!mpiio_runtime || instrumentation_disabled)
-        return(NULL);
-
-    /* find file record by name first */
-    file = mpiio_file_by_name(name);
-
-    if(!file)
+        free(rec_ref);
         return(NULL);
-
-    /* search hash table for existing file ref for this fh */
-    HASH_FIND(hlink, mpiio_runtime->fh_hash, &fh, sizeof(fh), ref);
-    if(ref)
-    {
-        /* we have a reference.  Make sure it points to the correct file
-         * and return it
-         */
-        ref->file = file;
-        return(file);
     }
 
-    /* if we hit this point, then we don't have a reference for this fh
-     * in the table yet.  Add it.
+    /* register the actual file record with darshan-core so it is persisted
+     * in the log file
      */
-    ref = malloc(sizeof(*ref));
-    if(!ref)
-        return(NULL);
-    memset(ref, 0, sizeof(*ref));
-
-    ref->file = file;
-    ref->fh = fh;    
-    HASH_ADD(hlink, mpiio_runtime->fh_hash, fh, sizeof(fh), ref);
-
-    return(file);
-}
-
-/* get an MPIIO file record for the given file handle */
-static struct mpiio_file_runtime* mpiio_file_by_fh(MPI_File fh)
-{
-    struct mpiio_file_runtime_ref* ref;
-
-    if(!mpiio_runtime || instrumentation_disabled)
-        return(NULL);
-
-    /* search hash table for existing file ref for this file handle */
-    HASH_FIND(hlink, mpiio_runtime->fh_hash, &fh, sizeof(fh), ref);
-    if(ref)
-        return(ref->file);
-
-    return(NULL);
-}
-
-/* free up reference data structures for the given file handle */
-static void mpiio_file_close_fh(MPI_File fh)
-{
-    struct mpiio_file_runtime_ref* ref;
-
-    if(!mpiio_runtime || instrumentation_disabled)
-        return;
+    file_rec = darshan_core_register_record(
+        rec_id,
+        path,
+        DARSHAN_MPIIO_MOD,
+        sizeof(struct darshan_mpiio_file),
+        NULL);
 
-    /* search hash table for this fd */
-    HASH_FIND(hlink, mpiio_runtime->fh_hash, &fh, sizeof(fh), ref);
-    if(ref)
+    if(!file_rec)
     {
-        /* we have a reference, delete it */
-        HASH_DELETE(hlink, mpiio_runtime->fh_hash, ref);
-        free(ref);
+        darshan_delete_record_ref(&(mpiio_runtime->rec_id_hash),
+            &rec_id, sizeof(darshan_record_id));
+        free(rec_ref);
+        return(NULL);
     }
 
-    return;
+    /* registering this file record was successful, so initialize some fields */
+    file_rec->base_rec.id = rec_id;
+    file_rec->base_rec.rank = my_rank;
+    rec_ref->file_rec = file_rec;
+    mpiio_runtime->file_rec_count++;
+
+    return(rec_ref);
 }
 
 /* compare function for sorting file records by descending rank */
@@ -1036,6 +921,27 @@ static int mpiio_record_compare(const void* a_p, const void* b_p)
     return 0;
 }
 
+static void mpiio_finalize_file_records(void *rec_ref_p)
+{
+    struct mpiio_file_record_ref *rec_ref =
+        (struct mpiio_file_record_ref *)rec_ref_p;
+
+#ifndef __DARSHAN_ENABLE_MMAP_LOGS
+    /* walk common counters to get 4 most common -- only if mmap
+     * feature is disabled (mmap updates counters on the go)
+     */
+
+    /* common accesses */
+    darshan_walk_common_vals(rec_ref->access_root,
+        &(rec_ref->file_rec->counters[MPIIO_ACCESS1_ACCESS]),
+        &(rec_ref->file_rec->counters[MPIIO_ACCESS1_COUNT]));
+#endif
+
+    tdestroy(rec_ref->access_root, free);
+
+    return;
+}
+
 static void mpiio_record_reduction_op(
     void* infile_v,
     void* inoutfile_v,
@@ -1047,8 +953,6 @@ static void mpiio_record_reduction_op(
     struct darshan_mpiio_file *inoutfile = inoutfile_v;
     int i, j, k;
 
-    assert(mpiio_runtime);
-
     for(i=0; i<*len; i++)
     {
         memset(&tmp_file, 0, sizeof(struct darshan_mpiio_file));
@@ -1296,58 +1200,46 @@ static void mpiio_shared_record_variance(MPI_Comm mod_comm,
     return;
 }
 
-/**************************************************************************
- * Functions exported by MPI-IO module for coordinating with darshan-core *
- **************************************************************************/
-
-static void mpiio_begin_shutdown()
+static void mpiio_cleanup_runtime()
 {
-    assert(mpiio_runtime);
+    darshan_clear_record_refs(&(mpiio_runtime->fh_hash), 0);
+    darshan_clear_record_refs(&(mpiio_runtime->rec_id_hash), 1);
 
-    MPIIO_LOCK();
-    /* disable further instrumentation while Darshan shuts down */
-    instrumentation_disabled = 1;
-    MPIIO_UNLOCK();
+    free(mpiio_runtime);
+    mpiio_runtime = NULL;
 
     return;
 }
 
-static void mpiio_get_output_data(
+/**************************************************************************
+ * Functions exported by MPI-IO module for coordinating with darshan-core *
+ **************************************************************************/
+
+static void mpiio_shutdown(
     MPI_Comm mod_comm,
     darshan_record_id *shared_recs,
     int shared_rec_count,
     void **mpiio_buf,
     int *mpiio_buf_sz)
 {
-    struct mpiio_file_runtime *file;
-    struct mpiio_file_runtime* tmp;
-    int i;
+    struct mpiio_file_record_ref *rec_ref;
+    struct darshan_mpiio_file *mpiio_rec_buf = *(struct darshan_mpiio_file **)mpiio_buf;
+    int mpiio_rec_count;
     double mpiio_time;
-    void *red_send_buf = NULL;
-    void *red_recv_buf = NULL;
+    struct darshan_mpiio_file *red_send_buf = NULL;
+    struct darshan_mpiio_file *red_recv_buf = NULL;
     MPI_Datatype red_type;
     MPI_Op red_op;
+    int i;
 
+    MPIIO_LOCK();
     assert(mpiio_runtime);
+    mpiio_rec_count = mpiio_runtime->file_rec_count;
 
-    /* go through and set the 4 most common access sizes for MPI-IO */
-    for(i = 0; i < mpiio_runtime->file_array_ndx; i++)
-    {
-        tmp = &(mpiio_runtime->file_runtime_array[i]);
-
-#ifndef __DARSHAN_ENABLE_MMAP_LOGS
-        /* walk common counters to get 4 most common -- only if mmap
-         * feature is disabled (mmap updates counters on the go)
-         */
-
-        /* common access sizes */
-        darshan_walk_common_vals(tmp->access_root,
-            &(tmp->file_record->counters[MPIIO_ACCESS1_ACCESS]),
-            &(tmp->file_record->counters[MPIIO_ACCESS1_COUNT]));
-#endif
-
-        tdestroy(tmp->access_root, free);
-    }
+    /* perform any final transformations on MPIIO file records before
+     * writing them out to log file
+     */
+    darshan_iter_record_refs(mpiio_runtime->rec_id_hash, &mpiio_finalize_file_records);
 
     /* if there are globally shared files, do a shared file reduction */
     /* NOTE: the shared file reduction is also skipped if the 
@@ -1358,48 +1250,47 @@ static void mpiio_get_output_data(
         /* necessary initialization of shared records */
         for(i = 0; i < shared_rec_count; i++)
         {
-            HASH_FIND(hlink, mpiio_runtime->file_hash, &shared_recs[i],
-                sizeof(darshan_record_id), file);
-            assert(file);
+            rec_ref = darshan_lookup_record_ref(mpiio_runtime->rec_id_hash,
+                &shared_recs[i], sizeof(darshan_record_id));
+            assert(rec_ref);
 
             mpiio_time =
-                file->file_record->fcounters[MPIIO_F_READ_TIME] +
-                file->file_record->fcounters[MPIIO_F_WRITE_TIME] +
-                file->file_record->fcounters[MPIIO_F_META_TIME];
+                rec_ref->file_rec->fcounters[MPIIO_F_READ_TIME] +
+                rec_ref->file_rec->fcounters[MPIIO_F_WRITE_TIME] +
+                rec_ref->file_rec->fcounters[MPIIO_F_META_TIME];
 
             /* initialize fastest/slowest info prior to the reduction */
-            file->file_record->counters[MPIIO_FASTEST_RANK] =
-                file->file_record->base_rec.rank;
-            file->file_record->counters[MPIIO_FASTEST_RANK_BYTES] =
-                file->file_record->counters[MPIIO_BYTES_READ] +
-                file->file_record->counters[MPIIO_BYTES_WRITTEN];
-            file->file_record->fcounters[MPIIO_F_FASTEST_RANK_TIME] =
+            rec_ref->file_rec->counters[MPIIO_FASTEST_RANK] =
+                rec_ref->file_rec->base_rec.rank;
+            rec_ref->file_rec->counters[MPIIO_FASTEST_RANK_BYTES] =
+                rec_ref->file_rec->counters[MPIIO_BYTES_READ] +
+                rec_ref->file_rec->counters[MPIIO_BYTES_WRITTEN];
+            rec_ref->file_rec->fcounters[MPIIO_F_FASTEST_RANK_TIME] =
                 mpiio_time;
 
             /* until reduction occurs, we assume that this rank is both
              * the fastest and slowest. It is up to the reduction operator
              * to find the true min and max.
              */
-            file->file_record->counters[MPIIO_SLOWEST_RANK] =
-                file->file_record->counters[MPIIO_FASTEST_RANK];
-            file->file_record->counters[MPIIO_SLOWEST_RANK_BYTES] =
-                file->file_record->counters[MPIIO_FASTEST_RANK_BYTES];
-            file->file_record->fcounters[MPIIO_F_SLOWEST_RANK_TIME] =
-                file->file_record->fcounters[MPIIO_F_FASTEST_RANK_TIME];
-
-            file->file_record->base_rec.rank = -1;
+            rec_ref->file_rec->counters[MPIIO_SLOWEST_RANK] =
+                rec_ref->file_rec->counters[MPIIO_FASTEST_RANK];
+            rec_ref->file_rec->counters[MPIIO_SLOWEST_RANK_BYTES] =
+                rec_ref->file_rec->counters[MPIIO_FASTEST_RANK_BYTES];
+            rec_ref->file_rec->fcounters[MPIIO_F_SLOWEST_RANK_TIME] =
+                rec_ref->file_rec->fcounters[MPIIO_F_FASTEST_RANK_TIME];
+
+            rec_ref->file_rec->base_rec.rank = -1;
         }
 
         /* sort the array of files descending by rank so that we get all of the 
          * shared files (marked by rank -1) in a contiguous portion at end 
          * of the array
          */
-        qsort(mpiio_runtime->file_record_array, mpiio_runtime->file_array_ndx,
-            sizeof(struct darshan_mpiio_file), mpiio_record_compare);
+        qsort(mpiio_rec_buf, mpiio_rec_count, sizeof(struct darshan_mpiio_file),
+            mpiio_record_compare);
 
-        /* make *send_buf point to the shared files at the end of sorted array */
-        red_send_buf =
-            &(mpiio_runtime->file_record_array[mpiio_runtime->file_array_ndx-shared_rec_count]);
+        /* make send_buf point to the shared files at the end of sorted array */
+        red_send_buf = &(mpiio_rec_buf[mpiio_rec_count-shared_rec_count]);
 
         /* allocate memory for the reduction output on rank 0 */
         if(my_rank == 0)
@@ -1407,6 +1298,7 @@ static void mpiio_get_output_data(
             red_recv_buf = malloc(shared_rec_count * sizeof(struct darshan_mpiio_file));
             if(!red_recv_buf)
             {
+                MPIIO_UNLOCK();
                 return;
             }
         }
@@ -1432,44 +1324,29 @@ static void mpiio_get_output_data(
         /* clean up reduction state */
         if(my_rank == 0)
         {
-            int tmp_ndx = mpiio_runtime->file_array_ndx - shared_rec_count;
-            memcpy(&(mpiio_runtime->file_record_array[tmp_ndx]), red_recv_buf,
+            int tmp_ndx = mpiio_rec_count - shared_rec_count;
+            memcpy(&(mpiio_rec_buf[tmp_ndx]), red_recv_buf,
                 shared_rec_count * sizeof(struct darshan_mpiio_file));
             free(red_recv_buf);
         }
         else
         {
-            mpiio_runtime->file_array_ndx -= shared_rec_count;
+            mpiio_rec_count -= shared_rec_count;
         }
 
         DARSHAN_MPI_CALL(PMPI_Type_free)(&red_type);
         DARSHAN_MPI_CALL(PMPI_Op_free)(&red_op);
     }
 
-    *mpiio_buf = (void *)(mpiio_runtime->file_record_array);
-    *mpiio_buf_sz = mpiio_runtime->file_array_ndx * sizeof(struct darshan_mpiio_file);
+    *mpiio_buf_sz = mpiio_rec_count * sizeof(struct darshan_mpiio_file);
 
-    return;
-}
+    /* shutdown internal structures used for instrumenting */
+    mpiio_cleanup_runtime();
 
-static void mpiio_shutdown()
-{
-    struct mpiio_file_runtime_ref *ref, *tmp;
-
-    assert(mpiio_runtime);
-
-    HASH_ITER(hlink, mpiio_runtime->fh_hash, ref, tmp)
-    {
-        HASH_DELETE(hlink, mpiio_runtime->fh_hash, ref);
-        free(ref);
-    }
-
-    HASH_CLEAR(hlink, mpiio_runtime->file_hash); /* these entries are freed all at once below */
-
-    free(mpiio_runtime->file_runtime_array);
-    free(mpiio_runtime);
-    mpiio_runtime = NULL;
+    /* disable further instrumentation */
+    instrumentation_disabled = 1;
 
+    MPIIO_UNLOCK();
     return;
 }
 


=====================================
darshan-runtime/lib/darshan-posix.c
=====================================
--- a/darshan-runtime/lib/darshan-posix.c
+++ b/darshan-runtime/lib/darshan-posix.c
@@ -99,13 +99,13 @@ DARSHAN_FORWARD_DECL(lio_listio64, int, (int mode, struct aiocb64 *const aiocb_l
  * associate different types of handles with this posix_file_record_ref struct.
  * This allows us to index this struct (and the underlying file record) by using
  * either the corresponding Darshan record identifier (derived from the filename)
- * or by a generated file descriptor, for instance. So, while there should only
- * be a single Darshan record identifier that indexes a posix_file_record_ref,
+ * or by a generated file descriptor, for instance. Note that, while there should
+ * only be a single Darshan record identifier that indexes a posix_file_record_ref,
  * there could be multiple open file descriptors that index it.
  */
 struct posix_file_record_ref
 {
-    struct darshan_posix_file* file_rec;
+    struct darshan_posix_file *file_rec;
     int64_t offset;
     int64_t last_byte_read;
     int64_t last_byte_written;
@@ -113,9 +113,9 @@ struct posix_file_record_ref
     double last_meta_end;
     double last_read_end;
     double last_write_end;
-    void* access_root;
+    void *access_root;
     int access_count;
-    void* stride_root;
+    void *stride_root;
     int stride_count;
     struct posix_aio_tracker* aio_list;
 };
@@ -136,7 +136,7 @@ struct posix_aio_tracker
 {
     double tm1;
     void *aiocbp;
-    struct posix_aio_tracker* next;
+    struct posix_aio_tracker *next;
 };
 
 static void posix_runtime_initialize(
@@ -149,6 +149,8 @@ static struct posix_aio_tracker* posix_aio_tracker_del(
     int fd, void *aiocbp);
 static int posix_record_compare(
     const void* a, const void* b);
+static void posix_finalize_file_records(
+    void *rec_ref_p);
 static void posix_record_reduction_op(
     void* infile_v, void* inoutfile_v, int *len, MPI_Datatype *datatype);
 static void posix_shared_record_variance(
@@ -167,6 +169,9 @@ static int instrumentation_disabled = 0;
 static int my_rank = -1;
 static int darshan_mem_alignment = 1;
 
+#define POSIX_LOCK() pthread_mutex_lock(&posix_runtime_mutex)
+#define POSIX_UNLOCK() pthread_mutex_unlock(&posix_runtime_mutex)
+
 #define POSIX_PRE_RECORD() do { \
     POSIX_LOCK(); \
     if(!posix_runtime && !instrumentation_disabled) posix_runtime_initialize(); \
@@ -180,21 +185,24 @@ static int darshan_mem_alignment = 1;
     POSIX_UNLOCK(); \
 } while(0)
 
-#define POSIX_LOCK() pthread_mutex_lock(&posix_runtime_mutex)
-#define POSIX_UNLOCK() pthread_mutex_unlock(&posix_runtime_mutex)
-
 #define POSIX_RECORD_OPEN(__ret, __path, __mode, __stream_flag, __tm1, __tm2) do { \
-    struct posix_file_record_ref *rec_ref; \
     darshan_record_id rec_id; \
+    struct posix_file_record_ref *rec_ref; \
+    char *newpath; \
     if(__ret < 0) break; \
-    if(darshan_core_excluded_path(__path)) break; \
-    rec_id = darshan_record_id_from_path(__path); \
+    newpath = darshan_clean_file_path(__path); \
+    if(!newpath) newpath = (char *)__path; \
+    if(darshan_core_excluded_path(newpath)) { \
+        if(newpath != __path) free(newpath); \
+        break; \
+    } \
+    rec_id = darshan_core_gen_record_id(newpath); \
     rec_ref = darshan_lookup_record_ref(posix_runtime->rec_id_hash, &rec_id, sizeof(darshan_record_id)); \
+    if(!rec_ref) rec_ref = posix_track_new_file_record(rec_id, newpath); \
     if(!rec_ref) { \
-        rec_ref = posix_track_new_file_record(rec_id, __path); \
-        if(!rec_ref) break; \
+        if(newpath != __path) free(newpath); \
+        break; \
     } \
-    if(darshan_add_record_ref(&(posix_runtime->fd_hash), &__ret, sizeof(int), rec_ref) == 0) break; \
     if(__mode) \
         rec_ref->file_rec->counters[POSIX_MODE] = __mode; \
     rec_ref->offset = 0; \
@@ -208,6 +216,7 @@ static int darshan_mem_alignment = 1;
         rec_ref->file_rec->fcounters[POSIX_F_OPEN_TIMESTAMP] = __tm1; \
     DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[POSIX_F_META_TIME], \
         __tm1, __tm2, rec_ref->last_meta_end); \
+    darshan_add_record_ref(&(posix_runtime->fd_hash), &__ret, sizeof(int), rec_ref); \
 } while(0)
 
 #define POSIX_RECORD_READ(__ret, __fd, __pread_flag, __pread_offset, __aligned, __stream_flag, __tm1, __tm2) do { \
@@ -323,14 +332,18 @@ static int darshan_mem_alignment = 1;
 } while(0)
 
 #define POSIX_LOOKUP_RECORD_STAT(__path, __statbuf, __tm1, __tm2) do { \
-    struct posix_file_record_ref* rec_ref; \
     darshan_record_id rec_id; \
-    if(darshan_core_excluded_path(__path)) break; \
-    rec_id = darshan_record_id_from_path(__path); \
-    rec_ref = darshan_lookup_record_ref(posix_runtime->rec_id_hash, &rec_id, sizeof(darshan_record_id)); \
-    if(!rec_ref) { \
-        rec_ref = posix_track_new_file_record(rec_id, __path); \
+    struct posix_file_record_ref* rec_ref; \
+    char *newpath = darshan_clean_file_path(__path); \
+    if(!newpath) newpath = (char *)__path; \
+    if(darshan_core_excluded_path(newpath)) { \
+        if(newpath != __path) free(newpath); \
+        break; \
     } \
+    rec_id = darshan_core_gen_record_id(newpath); \
+    rec_ref = darshan_lookup_record_ref(posix_runtime->rec_id_hash, &rec_id, sizeof(darshan_record_id)); \
+    if(!rec_ref) rec_ref = posix_track_new_file_record(rec_id, newpath); \
+    if(newpath != __path) free(newpath); \
     if(rec_ref) { \
         POSIX_RECORD_STAT(rec_ref, __statbuf, __tm1, __tm2); \
     } \
@@ -1429,7 +1442,6 @@ static struct posix_file_record_ref *posix_track_new_file_record(
 {
     struct darshan_posix_file *file_rec = NULL;
     struct posix_file_record_ref *rec_ref = NULL;
-    char *newpath = NULL;
     int file_alignment;
     int ret;
 
@@ -1447,23 +1459,21 @@ static struct posix_file_record_ref *posix_track_new_file_record(
         return(NULL);
     }
 
-    /* cleanup name and convert to absolute path */
-    newpath = darshan_clean_file_path(path);
-    if(!newpath)
-        newpath = (char *)path;
-
     /* register the actual file record with darshan-core so it is persisted
      * in the log file
      */
-    file_rec = darshan_core_register_record(rec_id, newpath, DARSHAN_POSIX_MOD,
-        sizeof(struct darshan_posix_file), &file_alignment);
+    file_rec = darshan_core_register_record(
+        rec_id,
+        path,
+        DARSHAN_POSIX_MOD,
+        sizeof(struct darshan_posix_file),
+        &file_alignment);
+
     if(!file_rec)
     {
         darshan_delete_record_ref(&(posix_runtime->rec_id_hash),
             &rec_id, sizeof(darshan_record_id));
         free(rec_ref);
-        if(newpath != path)
-            free(newpath);
         return(NULL);
     }
 
@@ -1475,8 +1485,6 @@ static struct posix_file_record_ref *posix_track_new_file_record(
     rec_ref->file_rec = file_rec;
     posix_runtime->file_rec_count++;
 
-    if(newpath != path)
-        free(newpath);
     return(rec_ref);
 }
 
@@ -1968,7 +1976,7 @@ static void posix_shutdown(
         qsort(posix_rec_buf, posix_rec_count, sizeof(struct darshan_posix_file),
             posix_record_compare);
 
-        /* make *send_buf point to the shared files at the end of sorted array */
+        /* make send_buf point to the shared files at the end of sorted array */
         red_send_buf = &(posix_rec_buf[posix_rec_count-shared_rec_count]);
 
         /* allocate memory for the reduction output on rank 0 */



View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/compare/660130d9508704c44668b234ffcec2f73f035445...31e76e2c782371bac280d03d3a91116e4ea1ee50
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20160610/d519b08a/attachment-0001.html>


More information about the Darshan-commits mailing list