[Darshan-commits] [Git][darshan/darshan][dev-stdio] initial skeleton for stdio wrapper lib
Philip Carns
xgitlab at cels.anl.gov
Tue Apr 19 11:02:58 CDT 2016
Philip Carns pushed to branch dev-stdio at darshan / darshan
Commits:
ebbe6421 by Phil Carns at 2016-04-19T12:01:32-04:00
initial skeleton for stdio wrapper lib
- move fopen/fopen64 wrappers to stdio module
- make infrastructure for tracking stream pointers based on similar fd
infrastructure in posix
- define placeholder record structs
- wrappers don't actually do anything yet
- - - - -
6 changed files:
- darshan-runtime/Makefile.in
- darshan-runtime/lib/darshan-posix.c
- + darshan-runtime/lib/darshan-stdio.c
- darshan-runtime/share/ld-opts/darshan-base-ld-opts.in
- darshan-runtime/share/ld-opts/darshan-posix-ld-opts
- darshan-stdio-log-format.h
Changes:
=====================================
darshan-runtime/Makefile.in
=====================================
--- a/darshan-runtime/Makefile.in
+++ b/darshan-runtime/Makefile.in
@@ -33,8 +33,8 @@ CFLAGS_SHARED = -DDARSHAN_CONFIG_H=\"darshan-runtime-config.h\" -I . -I$(srcdir)
LIBS = -lz @LIBBZ2@
-DARSHAN_STATIC_MOD_OBJS = lib/darshan-posix.o lib/darshan-mpiio.o lib/darshan-hdf5.o lib/darshan-pnetcdf.o
-DARSHAN_DYNAMIC_MOD_OBJS = lib/darshan-posix.po lib/darshan-mpiio.po lib/darshan-hdf5.po lib/darshan-pnetcdf.po
+DARSHAN_STATIC_MOD_OBJS = lib/darshan-posix.o lib/darshan-mpiio.o lib/darshan-hdf5.o lib/darshan-pnetcdf.o lib/darshan-stdio.o
+DARSHAN_DYNAMIC_MOD_OBJS = lib/darshan-posix.po lib/darshan-mpiio.po lib/darshan-hdf5.po lib/darshan-pnetcdf.po lib/darshan-stdio.po
ifdef DARSHAN_USE_BGQ
DARSHAN_STATIC_MOD_OBJS += lib/darshan-bgq.o
@@ -76,6 +76,12 @@ lib/darshan-posix.o: lib/darshan-posix.c darshan.h $(DARSHAN_LOG_FORMAT) $(srcdi
lib/darshan-posix.po: lib/darshan-posix.c darshan.h $(DARSHAN_LOG_FORMAT) $(srcdir)/../darshan-posix-log-format.h | lib
$(CC) $(CFLAGS_SHARED) -c $< -o $@
+lib/darshan-stdio.o: lib/darshan-stdio.c darshan.h $(DARSHAN_LOG_FORMAT) $(srcdir)/../darshan-stdio-log-format.h | lib
+ $(CC) $(CFLAGS) -c $< -o $@
+
+lib/darshan-stdio.po: lib/darshan-stdio.c darshan.h $(DARSHAN_LOG_FORMAT) $(srcdir)/../darshan-stdio-log-format.h | lib
+ $(CC) $(CFLAGS_SHARED) -c $< -o $@
+
lib/darshan-mpiio.o: lib/darshan-mpiio.c darshan.h $(DARSHAN_LOG_FORMAT) $(srcdir)/../darshan-mpiio-log-format.h | lib
$(CC) $(CFLAGS) -c $< -o $@
@@ -157,6 +163,7 @@ endif
install -m 644 $(srcdir)/share/ld-opts/darshan-posix-ld-opts $(datarootdir)/ld-opts/darshan-posix-ld-opts
install -m 644 $(srcdir)/share/ld-opts/darshan-hdf5-ld-opts $(datarootdir)/ld-opts/darshan-hdf5-ld-opts
install -m 644 $(srcdir)/share/ld-opts/darshan-pnetcdf-ld-opts $(datarootdir)/ld-opts/darshan-pnetcdf-ld-opts
+ install -m 644 $(srcdir)/share/ld-opts/darshan-stdio-ld-opts $(datarootdir)/ld-opts/darshan-stdio-ld-opts
install -d $(libdir)/pkgconfig
install -m 644 lib/pkgconfig/darshan-runtime.pc $(libdir)/pkgconfig/darshan-runtime.pc
=====================================
darshan-runtime/lib/darshan-posix.c
=====================================
--- a/darshan-runtime/lib/darshan-posix.c
+++ b/darshan-runtime/lib/darshan-posix.c
@@ -43,8 +43,6 @@ DARSHAN_FORWARD_DECL(open, int, (const char *path, int flags, ...));
DARSHAN_FORWARD_DECL(open64, int, (const char *path, int flags, ...));
DARSHAN_FORWARD_DECL(creat, int, (const char* path, mode_t mode));
DARSHAN_FORWARD_DECL(creat64, int, (const char* path, mode_t mode));
-DARSHAN_FORWARD_DECL(fopen, FILE*, (const char *path, const char *mode));
-DARSHAN_FORWARD_DECL(fopen64, FILE*, (const char *path, const char *mode));
DARSHAN_FORWARD_DECL(mkstemp, int, (char *template));
DARSHAN_FORWARD_DECL(mkostemp, int, (char *template, int flags));
DARSHAN_FORWARD_DECL(mkstemps, int, (char *template, int suffixlen));
@@ -465,56 +463,6 @@ int DARSHAN_DECL(creat64)(const char* path, mode_t mode)
return(ret);
}
-FILE* DARSHAN_DECL(fopen)(const char *path, const char *mode)
-{
- FILE* ret;
- int fd;
- double tm1, tm2;
-
- MAP_OR_FAIL(fopen);
-
- tm1 = darshan_core_wtime();
- ret = __real_fopen(path, mode);
- tm2 = darshan_core_wtime();
-
- if(ret == NULL)
- fd = -1;
- else
- fd = fileno(ret);
-
- POSIX_LOCK();
- posix_runtime_initialize();
- POSIX_RECORD_OPEN(fd, path, 0, 1, tm1, tm2);
- POSIX_UNLOCK();
-
- return(ret);
-}
-
-FILE* DARSHAN_DECL(fopen64)(const char *path, const char *mode)
-{
- FILE* ret;
- int fd;
- double tm1, tm2;
-
- MAP_OR_FAIL(fopen64);
-
- tm1 = darshan_core_wtime();
- ret = __real_fopen64(path, mode);
- tm2 = darshan_core_wtime();
-
- if(ret == NULL)
- fd = -1;
- else
- fd = fileno(ret);
-
- POSIX_LOCK();
- posix_runtime_initialize();
- POSIX_RECORD_OPEN(fd, path, 0, 1, tm1, tm2);
- POSIX_UNLOCK();
-
- return(ret);
-}
-
int DARSHAN_DECL(mkstemp)(char* template)
{
int ret;
=====================================
darshan-runtime/lib/darshan-stdio.c
=====================================
--- /dev/null
+++ b/darshan-runtime/lib/darshan-stdio.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright (C) 2015 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ *
+ */
+
+#define _XOPEN_SOURCE 500
+#define _GNU_SOURCE
+
+#include "darshan-runtime-config.h"
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <string.h>
+#include <time.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/uio.h>
+#include <sys/mman.h>
+#include <search.h>
+#include <assert.h>
+#include <libgen.h>
+#include <pthread.h>
+
+#include "uthash.h"
+#include "utlist.h"
+
+#include "darshan.h"
+#include "darshan-dynamic.h"
+
+DARSHAN_FORWARD_DECL(fopen, FILE*, (const char *path, const char *mode));
+DARSHAN_FORWARD_DECL(fopen64, FILE*, (const char *path, const char *mode));
+
+/* The stdio_file_runtime structure maintains necessary runtime metadata
+ * for the STDIO file record (darshan_stdio_file structure, defined in
+ * darshan-stdio-log-format.h) pointed to by 'file_record'. This metadata
+ * assists with the instrumenting of specific statistics in the file record.
+ * 'hlink' is a hash table link structure used to add/remove this record
+ * from the hash table of STDIO file records for this process.
+ *
+ * RATIONALE: the STDIO module needs to track some stateful, volatile
+ * information about each open file (like the current file offset, most recent
+ * access time, etc.) to aid in instrumentation, but this information can't be
+ * stored in the darshan_stdio_file struct because we don't want it to appear in
+ * the final darshan log file. We therefore associate a stdio_file_runtime
+ * struct with each darshan_stdio_file struct in order to track this information.
+ *
+ * NOTE: There is a one-to-one mapping of stdio_file_runtime structs to
+ * darshan_stdio_file structs.
+ *
+ * NOTE: The stdio_file_runtime struct contains a pointer to a darshan_stdio_file
+ * struct (see the *file_record member) rather than simply embedding an entire
+ * darshan_stdio_file struct. This is done so that all of the darshan_stdio_file
+ * structs can be kept contiguous in memory as a single array to simplify
+ * reduction, compression, and storage.
+ */
+struct stdio_file_runtime
+{
+ /* TODO: make sure we need/want all of these fields */
+ struct darshan_stdio_file* file_record;
+ int64_t offset;
+ int64_t last_byte_read;
+ int64_t last_byte_written;
+ enum darshan_io_type last_io_type;
+ double last_meta_end;
+ double last_read_end;
+ double last_write_end;
+ UT_hash_handle hlink;
+};
+
+/* The stdio_file_runtime_ref structure is used to associate a STDIO
+ * stream with an already existing STDIO file record. This is
+ * necessary as many STDIO I/O functions take only an input stream,
+ * but STDIO file records are indexed by their full file paths (i.e., darshan
+ * record identifiers for STDIO files are created by hashing the file path).
+ * In other words, this structure is necessary as it allows us to look up a
+ * file record either by a pathname (stdio_file_runtime) or by STDIO stream
+ * (stdio_file_runtime_ref), depending on which parameters are
+ * available. This structure includes another hash table link, since separate
+ * hashes are maintained for stdio_file_runtime structures and stdio_file_runtime_ref
+ * structures.
+ *
+ * RATIONALE: In theory the FILE* information could be included in the
+ * stdio_file_runtime struct rather than in a separate structure here. The
+ * reason we don't do that is because the same file could be opened multiple
+ * times by a given process with different stream pointers and thus
+ * simulataneously referenced using different stream pointers. This practice is
+ * not common, but we must support it.
+ *
+ * NOTE: there are potentially multiple stdio_file_runtime_ref structures
+ * referring to a single stdio_file_runtime structure. Most of the time there is
+ * only one, however.
+ */
+struct stdio_file_runtime_ref
+{
+ struct stdio_file_runtime* file;
+ FILE* stream;
+ UT_hash_handle hlink;
+};
+
+/* The stdio_runtime structure maintains necessary state for storing
+ * STDIO file records and for coordinating with darshan-core at
+ * shutdown time.
+ */
+struct stdio_runtime
+{
+ struct stdio_file_runtime* file_runtime_array;
+ struct darshan_stdio_file* file_record_array;
+ int file_array_size;
+ int file_array_ndx;
+ struct stdio_file_runtime* file_hash;
+ struct stdio_file_runtime_ref* stream_hash;
+};
+
+static struct stdio_runtime *stdio_runtime = NULL;
+static pthread_mutex_t stdio_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
+static int instrumentation_disabled = 0;
+static int darshan_mem_alignment = 1;
+static int my_rank = -1;
+
+static void stdio_runtime_initialize(void);
+static struct stdio_file_runtime* stdio_file_by_name(const char *name);
+static struct stdio_file_runtime* stdio_file_by_name_setstream(const char* name, FILE *stream);
+static struct stdio_file_runtime* stdio_file_by_stream(FILE* stream);
+static void stdio_file_close_stream(FILE *stream);
+
+static void stdio_begin_shutdown(void);
+static void stdio_get_output_data(MPI_Comm mod_comm, darshan_record_id *shared_recs,
+ int shared_rec_count, void **stdio_buf, int *stdio_buf_sz);
+static void stdio_shutdown(void);
+
+#define STDIO_LOCK() pthread_mutex_lock(&stdio_runtime_mutex)
+#define STDIO_UNLOCK() pthread_mutex_unlock(&stdio_runtime_mutex)
+
+FILE* DARSHAN_DECL(fopen)(const char *path, const char *mode)
+{
+ FILE* ret;
+
+ fprintf(stderr, "FOO: HELLO WORLD (FOPEN)\n");
+
+ MAP_OR_FAIL(fopen);
+
+ ret = __real_fopen(path, mode);
+
+ return(ret);
+}
+
+FILE* DARSHAN_DECL(fopen64)(const char *path, const char *mode)
+{
+ FILE* ret;
+
+ fprintf(stderr, "FOO: HELLO WORLD (FOPEN64)\n");
+
+ MAP_OR_FAIL(fopen64);
+
+ ret = __real_fopen64(path, mode);
+
+ return(ret);
+}
+
+/**********************************************************
+ * Internal functions for manipulating STDIO module state *
+ **********************************************************/
+
+/* initialize internal STDIO module data structures and register with darshan-core */
+static void stdio_runtime_initialize()
+{
+ int mem_limit;
+ struct darshan_module_funcs stdio_mod_fns =
+ {
+ .begin_shutdown = &stdio_begin_shutdown,
+ .get_output_data = &stdio_get_output_data,
+ .shutdown = &stdio_shutdown
+ };
+
+ /* don't do anything if already initialized or instrumenation is disabled */
+ if(stdio_runtime || instrumentation_disabled)
+ return;
+
+ /* register the stdio module with darshan core */
+ darshan_core_register_module(
+ DARSHAN_STDIO_MOD,
+ &stdio_mod_fns,
+ &my_rank,
+ &mem_limit,
+ &darshan_mem_alignment);
+
+ /* return if no memory assigned by darshan core */
+ if(mem_limit == 0)
+ return;
+
+ stdio_runtime = malloc(sizeof(*stdio_runtime));
+ if(!stdio_runtime)
+ return;
+ memset(stdio_runtime, 0, sizeof(*stdio_runtime));
+
+ /* set maximum number of file records according to max memory limit */
+ /* NOTE: maximum number of records is based on the size of a stdio file record */
+ /* TODO: should we base memory usage off file record or total runtime structure sizes? */
+ stdio_runtime->file_array_size = mem_limit / sizeof(struct darshan_stdio_file);
+ stdio_runtime->file_array_ndx = 0;
+
+ /* allocate array of runtime file records */
+ stdio_runtime->file_runtime_array = malloc(stdio_runtime->file_array_size *
+ sizeof(struct stdio_file_runtime));
+ stdio_runtime->file_record_array = malloc(stdio_runtime->file_array_size *
+ sizeof(struct darshan_stdio_file));
+ if(!stdio_runtime->file_runtime_array || !stdio_runtime->file_record_array)
+ {
+ stdio_runtime->file_array_size = 0;
+ return;
+ }
+ memset(stdio_runtime->file_runtime_array, 0, stdio_runtime->file_array_size *
+ sizeof(struct stdio_file_runtime));
+ memset(stdio_runtime->file_record_array, 0, stdio_runtime->file_array_size *
+ sizeof(struct darshan_stdio_file));
+
+ return;
+}
+
+/* get a STDIO file record for the given file path */
+static struct stdio_file_runtime* stdio_file_by_name(const char *name)
+{
+ struct stdio_file_runtime *file = NULL;
+ char *newname = NULL;
+ darshan_record_id file_id;
+ int file_alignment;
+ int limit_flag;
+
+ if(!stdio_runtime || instrumentation_disabled)
+ return(NULL);
+
+ newname = darshan_clean_file_path(name);
+ if(!newname)
+ newname = (char*)name;
+
+ limit_flag = (stdio_runtime->file_array_ndx >= stdio_runtime->file_array_size);
+
+ /* get a unique id for this file from darshan core */
+ darshan_core_register_record(
+ (void*)newname,
+ strlen(newname),
+ DARSHAN_STDIO_MOD,
+ 1,
+ limit_flag,
+ &file_id,
+ &file_alignment);
+
+ /* the file record id is set to 0 if no memory is available for tracking
+ * new records -- just fall through and ignore this record
+ */
+ if(file_id == 0)
+ {
+ if(newname != name)
+ free(newname);
+ return(NULL);
+ }
+
+ /* search the hash table for this file record, and return if found */
+ HASH_FIND(hlink, stdio_runtime->file_hash, &file_id, sizeof(darshan_record_id), file);
+ if(file)
+ {
+ if(newname != name)
+ free(newname);
+ return(file);
+ }
+
+ /* no existing record, assign a new file record from the global array */
+ file = &(stdio_runtime->file_runtime_array[stdio_runtime->file_array_ndx]);
+ file->file_record = &(stdio_runtime->file_record_array[stdio_runtime->file_array_ndx]);
+ file->file_record->f_id = file_id;
+ file->file_record->rank = my_rank;
+
+ /* add new record to file hash table */
+ HASH_ADD(hlink, stdio_runtime->file_hash, file_record->f_id, sizeof(darshan_record_id), file);
+ stdio_runtime->file_array_ndx++;
+
+ if(newname != name)
+ free(newname);
+ return(file);
+}
+
+/* get a STDIO file record for the given file path, and also create a
+ * reference structure using the returned stream
+ */
+static struct stdio_file_runtime* stdio_file_by_name_setstream(const char* name, FILE *stream)
+{
+ struct stdio_file_runtime* file;
+ struct stdio_file_runtime_ref* ref;
+
+ if(!stdio_runtime || instrumentation_disabled)
+ return(NULL);
+
+ /* find file record by name first */
+ file = stdio_file_by_name(name);
+
+ if(!file)
+ return(NULL);
+
+ /* search hash table for existing file ref for this stream */
+ HASH_FIND(hlink, stdio_runtime->stream_hash, &stream, sizeof(FILE*), ref);
+ if(ref)
+ {
+ /* we have a reference. Make sure it points to the correct file
+ * and return it
+ */
+ ref->file = file;
+ return(file);
+ }
+
+ /* if we hit this point, then we don't have a reference for this stream
+ * in the table yet. Add it.
+ */
+ ref = malloc(sizeof(*ref));
+ if(!ref)
+ return(NULL);
+ memset(ref, 0, sizeof(*ref));
+
+ ref->file = file;
+ ref->stream = stream;
+ HASH_ADD(hlink, stdio_runtime->stream_hash, stream, sizeof(FILE*), ref);
+
+ return(file);
+}
+
+/* get a STDIO file record for the given stream */
+static struct stdio_file_runtime* stdio_file_by_stream(FILE *stream)
+{
+ struct stdio_file_runtime_ref* ref;
+
+ if(!stdio_runtime || instrumentation_disabled)
+ return(NULL);
+
+ /* search hash table for existing file ref for this stream */
+ HASH_FIND(hlink, stdio_runtime->stream_hash, &stream, sizeof(FILE*), ref);
+ if(ref)
+ return(ref->file);
+
+ return(NULL);
+}
+
+/* free up reference data structures for the given stream */
+static void stdio_file_close_stream(FILE *stream)
+{
+ struct stdio_file_runtime_ref* ref;
+
+ if(!stdio_runtime || instrumentation_disabled)
+ return;
+
+ /* search hash table for this stream */
+ HASH_FIND(hlink, stdio_runtime->stream_hash, &stream, sizeof(FILE*), ref);
+ if(ref)
+ {
+ /* we have a reference, delete it */
+ HASH_DELETE(hlink, stdio_runtime->stream_hash, ref);
+ free(ref);
+ }
+
+ return;
+}
+
+/************************************************************************
+ * Functions exported by this module for coordinating with darshan-core *
+ ************************************************************************/
+
+static void stdio_begin_shutdown()
+{
+ assert(stdio_runtime);
+
+ STDIO_LOCK();
+ /* disable further instrumentation while Darshan shuts down */
+ instrumentation_disabled = 1;
+ STDIO_UNLOCK();
+
+ return;
+}
+
+static void stdio_get_output_data(
+ MPI_Comm mod_comm,
+ darshan_record_id *shared_recs,
+ int shared_rec_count,
+ void **stdio_buf,
+ int *stdio_buf_sz)
+{
+
+ assert(stdio_runtime);
+
+ *stdio_buf = (void *)(stdio_runtime->file_record_array);
+ *stdio_buf_sz = stdio_runtime->file_array_ndx * sizeof(struct darshan_stdio_file);
+
+ return;
+}
+
+static void stdio_shutdown()
+{
+ struct stdio_file_runtime_ref *ref, *tmp;
+
+ assert(stdio_runtime);
+
+ HASH_ITER(hlink, stdio_runtime->stream_hash, ref, tmp)
+ {
+ HASH_DELETE(hlink, stdio_runtime->stream_hash, ref);
+ free(ref);
+ }
+
+ HASH_CLEAR(hlink, stdio_runtime->file_hash); /* these entries are freed all at once below */
+
+ free(stdio_runtime->file_runtime_array);
+ free(stdio_runtime->file_record_array);
+ free(stdio_runtime);
+ stdio_runtime = NULL;
+ instrumentation_disabled = 0;
+
+ return;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 4
+ * c-basic-offset: 4
+ * End:
+ *
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
=====================================
darshan-runtime/share/ld-opts/darshan-base-ld-opts.in
=====================================
--- a/darshan-runtime/share/ld-opts/darshan-base-ld-opts.in
+++ b/darshan-runtime/share/ld-opts/darshan-base-ld-opts.in
@@ -3,3 +3,4 @@
@@darshan_share_path@/ld-opts/darshan-posix-ld-opts
@@darshan_share_path@/ld-opts/darshan-hdf5-ld-opts
@@darshan_share_path@/ld-opts/darshan-pnetcdf-ld-opts
+@@darshan_share_path@/ld-opts/darshan-stdio-ld-opts
=====================================
darshan-runtime/share/ld-opts/darshan-posix-ld-opts
=====================================
--- a/darshan-runtime/share/ld-opts/darshan-posix-ld-opts
+++ b/darshan-runtime/share/ld-opts/darshan-posix-ld-opts
@@ -2,8 +2,6 @@
--wrap=open64
--wrap=creat
--wrap=creat64
---wrap=fopen
---wrap=fopen64
--wrap=mkstemp
--wrap=mkostemp
--wrap=mkstemps
=====================================
darshan-stdio-log-format.h
=====================================
--- a/darshan-stdio-log-format.h
+++ b/darshan-stdio-log-format.h
@@ -93,7 +93,7 @@ enum darshan_stdio_f_indices
* - integer I/O counters (operation counts, I/O sizes, etc.)
* - floating point I/O counters (timestamps, cumulative timers, etc.)
*/
-struct darshan_stdio_record
+struct darshan_stdio_file
{
darshan_record_id f_id;
int64_t rank;
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/commit/ebbe6421a698b49e7157f28977655adecb6c3b7f
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20160419/b518c8d1/attachment-0001.html>
More information about the Darshan-commits
mailing list