[Darshan-commits] [Git][darshan/darshan][dev-stdio] initial skeleton for stdio wrapper lib

Philip Carns xgitlab at cels.anl.gov
Tue Apr 19 11:02:58 CDT 2016


Philip Carns pushed to branch dev-stdio at darshan / darshan


Commits:
ebbe6421 by Phil Carns at 2016-04-19T12:01:32-04:00
initial skeleton for stdio wrapper lib

- move fopen/fopen64 wrappers to stdio module
- make infrastructure for tracking stream pointers based on similar fd
  infrastructure in posix
- define placeholder record structs
- wrappers don't actually do anything yet

- - - - -


6 changed files:

- darshan-runtime/Makefile.in
- darshan-runtime/lib/darshan-posix.c
- + darshan-runtime/lib/darshan-stdio.c
- darshan-runtime/share/ld-opts/darshan-base-ld-opts.in
- darshan-runtime/share/ld-opts/darshan-posix-ld-opts
- darshan-stdio-log-format.h


Changes:

=====================================
darshan-runtime/Makefile.in
=====================================
--- a/darshan-runtime/Makefile.in
+++ b/darshan-runtime/Makefile.in
@@ -33,8 +33,8 @@ CFLAGS_SHARED = -DDARSHAN_CONFIG_H=\"darshan-runtime-config.h\" -I . -I$(srcdir)
 
 LIBS = -lz @LIBBZ2@
 
-DARSHAN_STATIC_MOD_OBJS = lib/darshan-posix.o lib/darshan-mpiio.o lib/darshan-hdf5.o lib/darshan-pnetcdf.o
-DARSHAN_DYNAMIC_MOD_OBJS = lib/darshan-posix.po lib/darshan-mpiio.po lib/darshan-hdf5.po lib/darshan-pnetcdf.po
+DARSHAN_STATIC_MOD_OBJS = lib/darshan-posix.o lib/darshan-mpiio.o lib/darshan-hdf5.o lib/darshan-pnetcdf.o lib/darshan-stdio.o
+DARSHAN_DYNAMIC_MOD_OBJS = lib/darshan-posix.po lib/darshan-mpiio.po lib/darshan-hdf5.po lib/darshan-pnetcdf.po lib/darshan-stdio.po
 
 ifdef DARSHAN_USE_BGQ
 DARSHAN_STATIC_MOD_OBJS += lib/darshan-bgq.o
@@ -76,6 +76,12 @@ lib/darshan-posix.o: lib/darshan-posix.c darshan.h $(DARSHAN_LOG_FORMAT) $(srcdi
 lib/darshan-posix.po: lib/darshan-posix.c darshan.h $(DARSHAN_LOG_FORMAT) $(srcdir)/../darshan-posix-log-format.h | lib
 	$(CC) $(CFLAGS_SHARED) -c $< -o $@
 
+lib/darshan-stdio.o: lib/darshan-stdio.c darshan.h $(DARSHAN_LOG_FORMAT) $(srcdir)/../darshan-stdio-log-format.h | lib
+	$(CC) $(CFLAGS) -c $< -o $@
+
+lib/darshan-stdio.po: lib/darshan-stdio.c darshan.h $(DARSHAN_LOG_FORMAT) $(srcdir)/../darshan-stdio-log-format.h | lib
+	$(CC) $(CFLAGS_SHARED) -c $< -o $@
+
 lib/darshan-mpiio.o: lib/darshan-mpiio.c darshan.h $(DARSHAN_LOG_FORMAT) $(srcdir)/../darshan-mpiio-log-format.h | lib
 	$(CC) $(CFLAGS) -c $< -o $@
 
@@ -157,6 +163,7 @@ endif
 	install -m 644 $(srcdir)/share/ld-opts/darshan-posix-ld-opts $(datarootdir)/ld-opts/darshan-posix-ld-opts
 	install -m 644 $(srcdir)/share/ld-opts/darshan-hdf5-ld-opts $(datarootdir)/ld-opts/darshan-hdf5-ld-opts
 	install -m 644 $(srcdir)/share/ld-opts/darshan-pnetcdf-ld-opts $(datarootdir)/ld-opts/darshan-pnetcdf-ld-opts
+	install -m 644 $(srcdir)/share/ld-opts/darshan-stdio-ld-opts $(datarootdir)/ld-opts/darshan-stdio-ld-opts
 	install -d $(libdir)/pkgconfig
 	install -m 644 lib/pkgconfig/darshan-runtime.pc $(libdir)/pkgconfig/darshan-runtime.pc
 


=====================================
darshan-runtime/lib/darshan-posix.c
=====================================
--- a/darshan-runtime/lib/darshan-posix.c
+++ b/darshan-runtime/lib/darshan-posix.c
@@ -43,8 +43,6 @@ DARSHAN_FORWARD_DECL(open, int, (const char *path, int flags, ...));
 DARSHAN_FORWARD_DECL(open64, int, (const char *path, int flags, ...));
 DARSHAN_FORWARD_DECL(creat, int, (const char* path, mode_t mode));
 DARSHAN_FORWARD_DECL(creat64, int, (const char* path, mode_t mode));
-DARSHAN_FORWARD_DECL(fopen, FILE*, (const char *path, const char *mode));
-DARSHAN_FORWARD_DECL(fopen64, FILE*, (const char *path, const char *mode));
 DARSHAN_FORWARD_DECL(mkstemp, int, (char *template));
 DARSHAN_FORWARD_DECL(mkostemp, int, (char *template, int flags));
 DARSHAN_FORWARD_DECL(mkstemps, int, (char *template, int suffixlen));
@@ -465,56 +463,6 @@ int DARSHAN_DECL(creat64)(const char* path, mode_t mode)
     return(ret);
 }
 
-FILE* DARSHAN_DECL(fopen)(const char *path, const char *mode)
-{
-    FILE* ret;
-    int fd;
-    double tm1, tm2;
-
-    MAP_OR_FAIL(fopen);
-
-    tm1 = darshan_core_wtime();
-    ret = __real_fopen(path, mode);
-    tm2 = darshan_core_wtime();
-
-    if(ret == NULL)
-        fd = -1;
-    else
-        fd = fileno(ret);
-
-    POSIX_LOCK();
-    posix_runtime_initialize();
-    POSIX_RECORD_OPEN(fd, path, 0, 1, tm1, tm2);
-    POSIX_UNLOCK();
-
-    return(ret);
-}
-
-FILE* DARSHAN_DECL(fopen64)(const char *path, const char *mode)
-{
-    FILE* ret;
-    int fd;
-    double tm1, tm2;
-
-    MAP_OR_FAIL(fopen64);
-
-    tm1 = darshan_core_wtime();
-    ret = __real_fopen64(path, mode);
-    tm2 = darshan_core_wtime();
-
-    if(ret == NULL)
-        fd = -1;
-    else
-        fd = fileno(ret);
-
-    POSIX_LOCK();
-    posix_runtime_initialize();
-    POSIX_RECORD_OPEN(fd, path, 0, 1, tm1, tm2);
-    POSIX_UNLOCK();
-
-    return(ret);
-}
-
 int DARSHAN_DECL(mkstemp)(char* template)
 {
     int ret;


=====================================
darshan-runtime/lib/darshan-stdio.c
=====================================
--- /dev/null
+++ b/darshan-runtime/lib/darshan-stdio.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright (C) 2015 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ *
+ */
+
+#define _XOPEN_SOURCE 500
+#define _GNU_SOURCE
+
+#include "darshan-runtime-config.h"
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <string.h>
+#include <time.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/uio.h>
+#include <sys/mman.h>
+#include <search.h>
+#include <assert.h>
+#include <libgen.h>
+#include <pthread.h>
+
+#include "uthash.h"
+#include "utlist.h"
+
+#include "darshan.h"
+#include "darshan-dynamic.h"
+
+DARSHAN_FORWARD_DECL(fopen, FILE*, (const char *path, const char *mode));
+DARSHAN_FORWARD_DECL(fopen64, FILE*, (const char *path, const char *mode));
+
+/* The stdio_file_runtime structure maintains necessary runtime metadata
+ * for the STDIO file record (darshan_stdio_file structure, defined in
+ * darshan-stdio-log-format.h) pointed to by 'file_record'. This metadata
+ * assists with the instrumenting of specific statistics in the file record.
+ * 'hlink' is a hash table link structure used to add/remove this record
+ * from the hash table of STDIO file records for this process. 
+ *
+ * RATIONALE: the STDIO module needs to track some stateful, volatile 
+ * information about each open file (like the current file offset, most recent 
+ * access time, etc.) to aid in instrumentation, but this information can't be
+ * stored in the darshan_stdio_file struct because we don't want it to appear in
+ * the final darshan log file.  We therefore associate a stdio_file_runtime
+ * struct with each darshan_stdio_file struct in order to track this information.
+  *
+ * NOTE: There is a one-to-one mapping of stdio_file_runtime structs to
+ * darshan_stdio_file structs.
+ *
+ * NOTE: The stdio_file_runtime struct contains a pointer to a darshan_stdio_file
+ * struct (see the *file_record member) rather than simply embedding an entire
+ * darshan_stdio_file struct.  This is done so that all of the darshan_stdio_file
+ * structs can be kept contiguous in memory as a single array to simplify
+ * reduction, compression, and storage.
+ */
+struct stdio_file_runtime
+{
+    /* TODO: make sure we need/want all of these fields */
+    struct darshan_stdio_file* file_record;
+    int64_t offset;
+    int64_t last_byte_read;
+    int64_t last_byte_written;
+    enum darshan_io_type last_io_type;
+    double last_meta_end;
+    double last_read_end;
+    double last_write_end;
+    UT_hash_handle hlink;
+};
+
+/* The stdio_file_runtime_ref structure is used to associate a STDIO
+ * stream with an already existing STDIO file record. This is
+ * necessary as many STDIO I/O functions take only an input stream,
+ * but STDIO file records are indexed by their full file paths (i.e., darshan
+ * record identifiers for STDIO files are created by hashing the file path).
+ * In other words, this structure is necessary as it allows us to look up a
+ * file record either by a pathname (stdio_file_runtime) or by STDIO stream 
+ * (stdio_file_runtime_ref), depending on which parameters are
+ * available. This structure includes another hash table link, since separate
+ * hashes are maintained for stdio_file_runtime structures and stdio_file_runtime_ref
+ * structures.
+ *
+ * RATIONALE: In theory the FILE* information could be included in the
+ * stdio_file_runtime struct rather than in a separate structure here.  The
+ * reason we don't do that is because the same file could be opened multiple
+ * times by a given process with different stream pointers and thus
+ * simulataneously referenced using different stream pointers.  This practice is
+ * not common, but we must support it.
+ *
+ * NOTE: there are potentially multiple stdio_file_runtime_ref structures
+ * referring to a single stdio_file_runtime structure.  Most of the time there is
+ * only one, however.
+ */
+struct stdio_file_runtime_ref
+{
+    struct stdio_file_runtime* file;
+    FILE* stream;
+    UT_hash_handle hlink;
+};
+
+/* The stdio_runtime structure maintains necessary state for storing
+ * STDIO file records and for coordinating with darshan-core at 
+ * shutdown time.
+ */
+struct stdio_runtime
+{
+    struct stdio_file_runtime* file_runtime_array;
+    struct darshan_stdio_file* file_record_array;
+    int file_array_size;
+    int file_array_ndx;
+    struct stdio_file_runtime* file_hash;
+    struct stdio_file_runtime_ref* stream_hash;
+};
+
+static struct stdio_runtime *stdio_runtime = NULL;
+static pthread_mutex_t stdio_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
+static int instrumentation_disabled = 0;
+static int darshan_mem_alignment = 1;
+static int my_rank = -1;
+
+static void stdio_runtime_initialize(void);
+static struct stdio_file_runtime* stdio_file_by_name(const char *name);
+static struct stdio_file_runtime* stdio_file_by_name_setstream(const char* name, FILE *stream);
+static struct stdio_file_runtime* stdio_file_by_stream(FILE* stream);
+static void stdio_file_close_stream(FILE *stream);
+
+static void stdio_begin_shutdown(void);
+static void stdio_get_output_data(MPI_Comm mod_comm, darshan_record_id *shared_recs,
+    int shared_rec_count, void **stdio_buf, int *stdio_buf_sz);
+static void stdio_shutdown(void);
+
+#define STDIO_LOCK() pthread_mutex_lock(&stdio_runtime_mutex)
+#define STDIO_UNLOCK() pthread_mutex_unlock(&stdio_runtime_mutex)
+
+FILE* DARSHAN_DECL(fopen)(const char *path, const char *mode)
+{
+    FILE* ret;
+
+    fprintf(stderr, "FOO: HELLO WORLD (FOPEN)\n");
+
+    MAP_OR_FAIL(fopen);
+
+    ret = __real_fopen(path, mode);
+
+    return(ret);
+}
+
+FILE* DARSHAN_DECL(fopen64)(const char *path, const char *mode)
+{
+    FILE* ret;
+
+    fprintf(stderr, "FOO: HELLO WORLD (FOPEN64)\n");
+
+    MAP_OR_FAIL(fopen64);
+
+    ret = __real_fopen64(path, mode);
+
+    return(ret);
+}
+
+/**********************************************************
+ * Internal functions for manipulating STDIO module state *
+ **********************************************************/
+
+/* initialize internal STDIO module data structures and register with darshan-core */
+static void stdio_runtime_initialize()
+{
+    int mem_limit;
+    struct darshan_module_funcs stdio_mod_fns =
+    {
+        .begin_shutdown = &stdio_begin_shutdown,
+        .get_output_data = &stdio_get_output_data,
+        .shutdown = &stdio_shutdown
+    };
+
+    /* don't do anything if already initialized or instrumenation is disabled */
+    if(stdio_runtime || instrumentation_disabled)
+        return;
+
+    /* register the stdio module with darshan core */
+    darshan_core_register_module(
+        DARSHAN_STDIO_MOD,
+        &stdio_mod_fns,
+        &my_rank,
+        &mem_limit,
+        &darshan_mem_alignment);
+
+    /* return if no memory assigned by darshan core */
+    if(mem_limit == 0)
+        return;
+
+    stdio_runtime = malloc(sizeof(*stdio_runtime));
+    if(!stdio_runtime)
+        return;
+    memset(stdio_runtime, 0, sizeof(*stdio_runtime));
+
+    /* set maximum number of file records according to max memory limit */
+    /* NOTE: maximum number of records is based on the size of a stdio file record */
+    /* TODO: should we base memory usage off file record or total runtime structure sizes? */
+    stdio_runtime->file_array_size = mem_limit / sizeof(struct darshan_stdio_file);
+    stdio_runtime->file_array_ndx = 0;
+
+    /* allocate array of runtime file records */
+    stdio_runtime->file_runtime_array = malloc(stdio_runtime->file_array_size *
+                                               sizeof(struct stdio_file_runtime));
+    stdio_runtime->file_record_array = malloc(stdio_runtime->file_array_size *
+                                              sizeof(struct darshan_stdio_file));
+    if(!stdio_runtime->file_runtime_array || !stdio_runtime->file_record_array)
+    {
+        stdio_runtime->file_array_size = 0;
+        return;
+    }
+    memset(stdio_runtime->file_runtime_array, 0, stdio_runtime->file_array_size *
+           sizeof(struct stdio_file_runtime));
+    memset(stdio_runtime->file_record_array, 0, stdio_runtime->file_array_size *
+           sizeof(struct darshan_stdio_file));
+
+    return;
+}
+
+/* get a STDIO file record for the given file path */
+static struct stdio_file_runtime* stdio_file_by_name(const char *name)
+{
+    struct stdio_file_runtime *file = NULL;
+    char *newname = NULL;
+    darshan_record_id file_id;
+    int file_alignment;
+    int limit_flag;
+
+    if(!stdio_runtime || instrumentation_disabled)
+        return(NULL);
+
+    newname = darshan_clean_file_path(name);
+    if(!newname)
+        newname = (char*)name;
+
+    limit_flag = (stdio_runtime->file_array_ndx >= stdio_runtime->file_array_size);
+
+    /* get a unique id for this file from darshan core */
+    darshan_core_register_record(
+        (void*)newname,
+        strlen(newname),
+        DARSHAN_STDIO_MOD,
+        1,
+        limit_flag,
+        &file_id,
+        &file_alignment);
+
+    /* the file record id is set to 0 if no memory is available for tracking
+     * new records -- just fall through and ignore this record
+     */
+    if(file_id == 0)
+    {
+        if(newname != name)
+            free(newname);
+        return(NULL);
+    }
+
+    /* search the hash table for this file record, and return if found */
+    HASH_FIND(hlink, stdio_runtime->file_hash, &file_id, sizeof(darshan_record_id), file);
+    if(file)
+    {
+        if(newname != name)
+            free(newname);
+        return(file);
+    }
+
+    /* no existing record, assign a new file record from the global array */
+    file = &(stdio_runtime->file_runtime_array[stdio_runtime->file_array_ndx]);
+    file->file_record = &(stdio_runtime->file_record_array[stdio_runtime->file_array_ndx]);
+    file->file_record->f_id = file_id;
+    file->file_record->rank = my_rank;
+
+    /* add new record to file hash table */
+    HASH_ADD(hlink, stdio_runtime->file_hash, file_record->f_id, sizeof(darshan_record_id), file);
+    stdio_runtime->file_array_ndx++;
+
+    if(newname != name)
+        free(newname);
+    return(file);
+}
+
+/* get a STDIO file record for the given file path, and also create a
+ * reference structure using the returned stream
+ */
+static struct stdio_file_runtime* stdio_file_by_name_setstream(const char* name, FILE *stream)
+{
+    struct stdio_file_runtime* file;
+    struct stdio_file_runtime_ref* ref;
+
+    if(!stdio_runtime || instrumentation_disabled)
+        return(NULL);
+
+    /* find file record by name first */
+    file = stdio_file_by_name(name);
+
+    if(!file)
+        return(NULL);
+
+    /* search hash table for existing file ref for this stream */
+    HASH_FIND(hlink, stdio_runtime->stream_hash, &stream, sizeof(FILE*), ref);
+    if(ref)
+    {
+        /* we have a reference.  Make sure it points to the correct file
+         * and return it
+         */
+        ref->file = file;
+        return(file);
+    }
+
+    /* if we hit this point, then we don't have a reference for this stream 
+     * in the table yet.  Add it.
+     */
+    ref = malloc(sizeof(*ref));
+    if(!ref)
+        return(NULL);
+    memset(ref, 0, sizeof(*ref));
+
+    ref->file = file;
+    ref->stream = stream;    
+    HASH_ADD(hlink, stdio_runtime->stream_hash, stream, sizeof(FILE*), ref);
+
+    return(file);
+}
+
+/* get a STDIO file record for the given stream */
+static struct stdio_file_runtime* stdio_file_by_stream(FILE *stream)
+{
+    struct stdio_file_runtime_ref* ref;
+
+    if(!stdio_runtime || instrumentation_disabled)
+        return(NULL);
+
+    /* search hash table for existing file ref for this stream */
+    HASH_FIND(hlink, stdio_runtime->stream_hash, &stream, sizeof(FILE*), ref);
+    if(ref)
+        return(ref->file);
+
+    return(NULL);
+}
+
+/* free up reference data structures for the given stream */
+static void stdio_file_close_stream(FILE *stream)
+{
+    struct stdio_file_runtime_ref* ref;
+
+    if(!stdio_runtime || instrumentation_disabled)
+        return;
+
+    /* search hash table for this stream */
+    HASH_FIND(hlink, stdio_runtime->stream_hash, &stream, sizeof(FILE*), ref);
+    if(ref)
+    {
+        /* we have a reference, delete it */
+        HASH_DELETE(hlink, stdio_runtime->stream_hash, ref);
+        free(ref);
+    }
+
+    return;
+}
+
+/************************************************************************
+ * Functions exported by this module for coordinating with darshan-core *
+ ************************************************************************/
+
+static void stdio_begin_shutdown()
+{
+    assert(stdio_runtime);
+
+    STDIO_LOCK();
+    /* disable further instrumentation while Darshan shuts down */
+    instrumentation_disabled = 1;
+    STDIO_UNLOCK();
+
+    return;
+}
+
+static void stdio_get_output_data(
+    MPI_Comm mod_comm,
+    darshan_record_id *shared_recs,
+    int shared_rec_count,
+    void **stdio_buf,
+    int *stdio_buf_sz)
+{
+
+    assert(stdio_runtime);
+
+    *stdio_buf = (void *)(stdio_runtime->file_record_array);
+    *stdio_buf_sz = stdio_runtime->file_array_ndx * sizeof(struct darshan_stdio_file);
+
+    return;
+}
+
+static void stdio_shutdown()
+{
+    struct stdio_file_runtime_ref *ref, *tmp;
+
+    assert(stdio_runtime);
+
+    HASH_ITER(hlink, stdio_runtime->stream_hash, ref, tmp)
+    {
+        HASH_DELETE(hlink, stdio_runtime->stream_hash, ref);
+        free(ref);
+    }
+
+    HASH_CLEAR(hlink, stdio_runtime->file_hash); /* these entries are freed all at once below */
+
+    free(stdio_runtime->file_runtime_array);
+    free(stdio_runtime->file_record_array);
+    free(stdio_runtime);
+    stdio_runtime = NULL;
+    instrumentation_disabled = 0;
+    
+    return;
+}
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */


=====================================
darshan-runtime/share/ld-opts/darshan-base-ld-opts.in
=====================================
--- a/darshan-runtime/share/ld-opts/darshan-base-ld-opts.in
+++ b/darshan-runtime/share/ld-opts/darshan-base-ld-opts.in
@@ -3,3 +3,4 @@
 @@darshan_share_path@/ld-opts/darshan-posix-ld-opts
 @@darshan_share_path@/ld-opts/darshan-hdf5-ld-opts
 @@darshan_share_path@/ld-opts/darshan-pnetcdf-ld-opts
+@@darshan_share_path@/ld-opts/darshan-stdio-ld-opts


=====================================
darshan-runtime/share/ld-opts/darshan-posix-ld-opts
=====================================
--- a/darshan-runtime/share/ld-opts/darshan-posix-ld-opts
+++ b/darshan-runtime/share/ld-opts/darshan-posix-ld-opts
@@ -2,8 +2,6 @@
 --wrap=open64
 --wrap=creat
 --wrap=creat64
---wrap=fopen
---wrap=fopen64
 --wrap=mkstemp
 --wrap=mkostemp
 --wrap=mkstemps


=====================================
darshan-stdio-log-format.h
=====================================
--- a/darshan-stdio-log-format.h
+++ b/darshan-stdio-log-format.h
@@ -93,7 +93,7 @@ enum darshan_stdio_f_indices
  *      - integer I/O counters (operation counts, I/O sizes, etc.)
  *      - floating point I/O counters (timestamps, cumulative timers, etc.)
  */
-struct darshan_stdio_record
+struct darshan_stdio_file
 {
     darshan_record_id f_id;
     int64_t rank;



View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/commit/ebbe6421a698b49e7157f28977655adecb6c3b7f
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20160419/b518c8d1/attachment-0001.html>


More information about the Darshan-commits mailing list