[Darshan-commits] [Git][darshan/darshan][master] 4 commits: bug fix for filtering out inactive stdio records

Shane Snyder xgitlab at cels.anl.gov
Tue Jul 25 10:58:17 CDT 2017


Shane Snyder pushed to branch master at darshan / darshan


Commits:
a8a209d6 by Shane Snyder at 2017-07-24T15:32:39-05:00
bug fix for filtering out inactive stdio records

- - - - -
a63ee680 by Shane Snyder at 2017-07-24T15:34:14-05:00
bug fix in stdio read instrumentation macro

- - - - -
37b50d6a by Shane Snyder at 2017-07-24T16:08:25-05:00
bug fix for disabling module instrumentation

Previously, modules would explicitly disable instrumentation when
ever Darshan core called into the module to shut it down. The
issue is that Darshan core starts shutting down well before modules
do, so there is a time interval where modules could be trying to
instrument data while its not safe to do so.

Now, we expose a function (darshan_core_disabled_instrumentation)
that modules can call to determine if its safe to instrument
data.

- - - - -
6d9894c4 by Shane Snyder at 2017-07-25T10:58:13-05:00
Merge branch 'issue-229-negative-stdio-timers' into 'master'

Fix for Issue 229 negative stdio timers

See merge request !15
- - - - -


9 changed files:

- darshan-runtime/darshan.h
- darshan-runtime/lib/darshan-core.c
- darshan-runtime/lib/darshan-hdf5.c
- darshan-runtime/lib/darshan-lustre.c
- darshan-runtime/lib/darshan-mpiio.c
- darshan-runtime/lib/darshan-null.c
- darshan-runtime/lib/darshan-pnetcdf.c
- darshan-runtime/lib/darshan-posix.c
- darshan-runtime/lib/darshan-stdio.c


Changes:

=====================================
darshan-runtime/darshan.h
=====================================
--- a/darshan-runtime/darshan.h
+++ b/darshan-runtime/darshan.h
@@ -173,4 +173,13 @@ double darshan_core_wtime(void);
 int darshan_core_excluded_path(
     const char * path);
 
+/* darshan_core_disabled_instrumentation
+ *
+ * Returns true (1) if Darshan has currently disabled instrumentation,
+ * false (0) otherwise. If instrumentation is disabled, modules should
+ * no longer update any file records as part of the intercepted function
+ * wrappers.
+ */
+int darshan_core_disabled_instrumentation(void);
+
 #endif /* __DARSHAN_H */


=====================================
darshan-runtime/lib/darshan-core.c
=====================================
--- a/darshan-runtime/lib/darshan-core.c
+++ b/darshan-runtime/lib/darshan-core.c
@@ -2166,6 +2166,20 @@ int darshan_core_excluded_path(const char *path)
     return(0);
 }
 
+int darshan_core_disabled_instrumentation()
+{
+    int ret;
+
+    DARSHAN_CORE_LOCK();
+    if(darshan_core)
+        ret = 0;
+    else
+        ret = 1;
+    DARSHAN_CORE_UNLOCK();
+
+    return(ret);
+}
+
 /*
  * Local variables:
  *  c-indent-level: 4


=====================================
darshan-runtime/lib/darshan-hdf5.c
=====================================
--- a/darshan-runtime/lib/darshan-hdf5.c
+++ b/darshan-runtime/lib/darshan-hdf5.c
@@ -67,7 +67,6 @@ static void hdf5_shutdown(
 
 static struct hdf5_runtime *hdf5_runtime = NULL;
 static pthread_mutex_t hdf5_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
-static int instrumentation_disabled = 0;
 static int my_rank = -1;
 
 #define HDF5_LOCK() pthread_mutex_lock(&hdf5_runtime_mutex)
@@ -75,7 +74,7 @@ static int my_rank = -1;
 
 #define HDF5_PRE_RECORD() do { \
     HDF5_LOCK(); \
-    if(!instrumentation_disabled) { \
+    if(!darshan_core_disabled_instrumentation()) { \
         if(!hdf5_runtime) hdf5_runtime_initialize(); \
         if(hdf5_runtime) break; \
     } \
@@ -382,7 +381,6 @@ static void hdf5_cleanup_runtime()
 
     free(hdf5_runtime);
     hdf5_runtime = NULL;
-    instrumentation_disabled = 0;
 
     return;
 }
@@ -410,9 +408,6 @@ static void hdf5_shutdown(
     HDF5_LOCK();
     assert(hdf5_runtime);
 
-    /* disable further instrumentation */
-    instrumentation_disabled = 1;
-
     hdf5_rec_count = hdf5_runtime->file_rec_count;
 
     /* if there are globally shared files, do a shared file reduction */


=====================================
darshan-runtime/lib/darshan-lustre.c
=====================================
--- a/darshan-runtime/lib/darshan-lustre.c
+++ b/darshan-runtime/lib/darshan-lustre.c
@@ -41,7 +41,6 @@ static void lustre_shutdown(
 
 struct lustre_runtime *lustre_runtime = NULL;
 static pthread_mutex_t lustre_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
-static int instrumentation_disabled = 0;
 static int my_rank = -1;
 
 #define LUSTRE_LOCK() pthread_mutex_lock(&lustre_runtime_mutex)
@@ -64,11 +63,6 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
     int ret;
 
     LUSTRE_LOCK();
-    if(instrumentation_disabled)
-    {
-        LUSTRE_UNLOCK();
-        return;
-    }
 
     /* try to init module if not already */
     if(!lustre_runtime) lustre_runtime_initialize();
@@ -234,9 +228,6 @@ static void lustre_shutdown(
     LUSTRE_LOCK();
     assert(lustre_runtime);
 
-    /* disable further instrumentation while we shutdown */
-    instrumentation_disabled = 1;
-
     lustre_runtime->record_buffer = *lustre_buf;
     lustre_runtime->record_buffer_size = *lustre_buf_sz;
 
@@ -279,7 +270,6 @@ static void lustre_shutdown(
     darshan_clear_record_refs(&(lustre_runtime->record_id_hash), 1);
     free(lustre_runtime);
     lustre_runtime = NULL;
-    instrumentation_disabled = 0;
 
     LUSTRE_UNLOCK();
     return;


=====================================
darshan-runtime/lib/darshan-mpiio.c
=====================================
--- a/darshan-runtime/lib/darshan-mpiio.c
+++ b/darshan-runtime/lib/darshan-mpiio.c
@@ -95,7 +95,6 @@ extern void dxt_mpiio_read(darshan_record_id rec_id, int64_t length,
 
 static struct mpiio_runtime *mpiio_runtime = NULL;
 static pthread_mutex_t mpiio_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
-static int instrumentation_disabled = 0;
 static int my_rank = -1;
 static int enable_dxt_io_trace = 0;
 
@@ -104,7 +103,7 @@ static int enable_dxt_io_trace = 0;
 
 #define MPIIO_PRE_RECORD() do { \
     MPIIO_LOCK(); \
-    if(!instrumentation_disabled) { \
+    if(!darshan_core_disabled_instrumentation()) { \
         if(!mpiio_runtime) { \
             mpiio_runtime_initialize(); \
         } \
@@ -1192,7 +1191,6 @@ static void mpiio_cleanup_runtime()
 
     free(mpiio_runtime);
     mpiio_runtime = NULL;
-    instrumentation_disabled = 0;
 
     return;
 }
@@ -1301,9 +1299,6 @@ static void mpiio_shutdown(
     MPIIO_LOCK();
     assert(mpiio_runtime);
 
-    /* disable further instrumentation while we shutdown */
-    instrumentation_disabled = 1;
-
     mpiio_rec_count = mpiio_runtime->file_rec_count;
 
     /* perform any final transformations on MPIIO file records before


=====================================
darshan-runtime/lib/darshan-null.c
=====================================
--- a/darshan-runtime/lib/darshan-null.c
+++ b/darshan-runtime/lib/darshan-null.c
@@ -100,8 +100,6 @@ static struct null_runtime *null_runtime = NULL;
  * may not be necessary for all instrumentation modules.
  */
 static pthread_mutex_t null_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
-/* the instrumentation_disabled flag is used to toggle wrapper functions on/off */
-static int instrumentation_disabled = 0;
 /* my_rank indicates the MPI rank of this process */
 static int my_rank = -1;
 
@@ -116,7 +114,7 @@ static int my_rank = -1;
  */
 #define NULL_PRE_RECORD() do { \
     NULL_LOCK(); \
-    if(!instrumentation_disabled) { \
+    if(!darshan_core_disabled_instrumentation()) { \
         if(!null_runtime) null_runtime_initialize(); \
         if(null_runtime) break; \
     } \
@@ -295,7 +293,6 @@ static void null_cleanup_runtime()
 
     free(null_runtime);
     null_runtime = NULL;
-    instrumentation_disabled = 0;
 
     return;
 }
@@ -317,9 +314,6 @@ static void null_shutdown(
     NULL_LOCK();
     assert(null_runtime);
 
-    /* disable further instrumentation while we shutdown */
-    instrumentation_disabled = 1;
-
     /* NOTE: this function can be used to run collective operations prior to
      * shutting down the module, as implied by the MPI communicator passed in
      * as the first agrument. Typically, module developers will want to run a


=====================================
darshan-runtime/lib/darshan-pnetcdf.c
=====================================
--- a/darshan-runtime/lib/darshan-pnetcdf.c
+++ b/darshan-runtime/lib/darshan-pnetcdf.c
@@ -57,7 +57,6 @@ static void pnetcdf_shutdown(
 
 static struct pnetcdf_runtime *pnetcdf_runtime = NULL;
 static pthread_mutex_t pnetcdf_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
-static int instrumentation_disabled = 0;
 static int my_rank = -1;
 
 #define PNETCDF_LOCK() pthread_mutex_lock(&pnetcdf_runtime_mutex)
@@ -65,7 +64,7 @@ static int my_rank = -1;
 
 #define PNETCDF_PRE_RECORD() do { \
     PNETCDF_LOCK(); \
-    if(!instrumentation_disabled) { \
+    if(!darshan_core_disabled_instrumentation()) { \
         if(!pnetcdf_runtime) pnetcdf_runtime_initialize(); \
         if(pnetcdf_runtime) break; \
     } \
@@ -337,7 +336,6 @@ static void pnetcdf_cleanup_runtime()
 
     free(pnetcdf_runtime);
     pnetcdf_runtime = NULL;
-    instrumentation_disabled = 0;
 
     return;
 }
@@ -366,9 +364,6 @@ static void pnetcdf_shutdown(
     PNETCDF_LOCK();
     assert(pnetcdf_runtime);
 
-    /* disable further instrumentation while we shutdown */
-    instrumentation_disabled = 1;
-
     pnetcdf_rec_count = pnetcdf_runtime->file_rec_count;
 
     /* if there are globally shared files, do a shared file reduction */


=====================================
darshan-runtime/lib/darshan-posix.c
=====================================
--- a/darshan-runtime/lib/darshan-posix.c
+++ b/darshan-runtime/lib/darshan-posix.c
@@ -167,7 +167,6 @@ extern void dxt_posix_read(darshan_record_id rec_id, int64_t offset,
 
 static struct posix_runtime *posix_runtime = NULL;
 static pthread_mutex_t posix_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
-static int instrumentation_disabled = 0;
 static int my_rank = -1;
 static int darshan_mem_alignment = 1;
 static int enable_dxt_io_trace = 0;
@@ -177,7 +176,7 @@ static int enable_dxt_io_trace = 0;
 
 #define POSIX_PRE_RECORD() do { \
     POSIX_LOCK(); \
-    if(!instrumentation_disabled) { \
+    if(!darshan_core_disabled_instrumentation()) { \
         if(!posix_runtime) { \
             posix_runtime_initialize(); \
         } \
@@ -1736,7 +1735,6 @@ static void posix_cleanup_runtime()
 
     free(posix_runtime);
     posix_runtime = NULL;
-    instrumentation_disabled = 0;
 
     return;
 }
@@ -1837,9 +1835,6 @@ static void posix_shutdown(
     POSIX_LOCK();
     assert(posix_runtime);
 
-    /* disable instrumentation while we shutdown */
-    instrumentation_disabled = 1;
-
     posix_rec_count = posix_runtime->file_rec_count;
 
     /* perform any final transformations on POSIX file records before


=====================================
darshan-runtime/lib/darshan-stdio.c
=====================================
--- a/darshan-runtime/lib/darshan-stdio.c
+++ b/darshan-runtime/lib/darshan-stdio.c
@@ -139,7 +139,6 @@ struct stdio_runtime
 
 static struct stdio_runtime *stdio_runtime = NULL;
 static pthread_mutex_t stdio_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
-static int instrumentation_disabled = 0;
 static int darshan_mem_alignment = 1;
 static int my_rank = -1;
 
@@ -164,7 +163,7 @@ static void stdio_cleanup_runtime();
 
 #define STDIO_PRE_RECORD() do { \
     STDIO_LOCK(); \
-    if(!instrumentation_disabled) { \
+    if(!darshan_core_disabled_instrumentation()) { \
         if(!stdio_runtime) stdio_runtime_initialize(); \
         if(stdio_runtime) break; \
     } \
@@ -224,7 +223,7 @@ static void stdio_cleanup_runtime();
      rec_ref->file_rec->fcounters[STDIO_F_READ_START_TIMESTAMP] > __tm1) \
         rec_ref->file_rec->fcounters[STDIO_F_READ_START_TIMESTAMP] = __tm1; \
     rec_ref->file_rec->fcounters[STDIO_F_READ_END_TIMESTAMP] = __tm2; \
-    DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[STDIO_F_READ_TIME], __tm1, __tm2, rec_ref->last_write_end); \
+    DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[STDIO_F_READ_TIME], __tm1, __tm2, rec_ref->last_read_end); \
 } while(0)
 
 #define STDIO_RECORD_WRITE(__fp, __bytes,  __tm1, __tm2, __fflush_flag) do{ \
@@ -772,7 +771,7 @@ void DARSHAN_DECL(rewind)(FILE *stream)
      * value in this wrapper.
      */
     STDIO_LOCK();
-    if(instrumentation_disabled) {
+    if(darshan_core_disabled_instrumentation()) {
         STDIO_UNLOCK();
         return;
     }
@@ -1120,9 +1119,6 @@ static void stdio_shutdown(
     STDIO_LOCK();
     assert(stdio_runtime);
 
-    /* disable further instrumentation */
-    instrumentation_disabled = 1;
-
     stdio_rec_count = stdio_runtime->file_rec_count;
 
     /* if there are globally shared files, do a shared file reduction */
@@ -1238,9 +1234,9 @@ static void stdio_shutdown(
             {
                 memmove(&stdio_rec_buf[i], &stdio_rec_buf[i+1],
                     (stdio_rec_count-i-1)*sizeof(stdio_rec_buf[i]));
-                stdio_rec_count--;
                 i--;
             }
+            stdio_rec_count--;
         }
     }
 
@@ -1313,7 +1309,6 @@ static void stdio_cleanup_runtime()
 
     free(stdio_runtime);
     stdio_runtime = NULL;
-    instrumentation_disabled = 0;
 
     return;
 }



View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/compare/bd252d0e4a0a76873b8c818a42735ad4d5e18c72...6d9894c40f7dd68c58dd97be52f9841175e40c33

---
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/compare/bd252d0e4a0a76873b8c818a42735ad4d5e18c72...6d9894c40f7dd68c58dd97be52f9841175e40c33
You're receiving this email because of your account on xgitlab.cels.anl.gov.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20170725/1006227f/attachment-0001.html>


More information about the Darshan-commits mailing list