[Darshan-commits] [Darshan] branch, dev-modular, updated. a0e8f8a87a9113013ae4460c32fa49b8257a0459

Service Account git at mcs.anl.gov
Thu Jan 22 10:59:23 CST 2015


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "".

The branch, dev-modular has been updated
       via  a0e8f8a87a9113013ae4460c32fa49b8257a0459 (commit)
       via  01d0a980101212371eb3fb6ccc76562c6f5eb857 (commit)
      from  abf424f2859b8ea43a4818a2c086531e39134dd4 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit a0e8f8a87a9113013ae4460c32fa49b8257a0459
Author: Shane Snyder <ssnyder at mcs.anl.gov>
Date:   Thu Jan 22 10:58:35 2015 -0600

    initial rewrite of logutils for new file format

commit 01d0a980101212371eb3fb6ccc76562c6f5eb857
Author: Shane Snyder <ssnyder at mcs.anl.gov>
Date:   Thu Jan 22 10:57:50 2015 -0600

    Refactoring/bug fixes for darshan-runtime

-----------------------------------------------------------------------

Summary of changes:
 darshan-log-format.h               |   23 +-
 darshan-runtime/darshan-core.h     |    3 +-
 darshan-runtime/darshan.h          |    2 -
 darshan-runtime/lib/darshan-core.c |  212 ++++++--
 darshan-util/Makefile.in           |   51 +-
 darshan-util/darshan-logutils.c    | 1069 +++++-------------------------------
 darshan-util/darshan-logutils.h    |   11 +-
 7 files changed, 367 insertions(+), 1004 deletions(-)


Diff of changes:
diff --git a/darshan-log-format.h b/darshan-log-format.h
index 2f59c34..4777b1b 100644
--- a/darshan-log-format.h
+++ b/darshan-log-format.h
@@ -19,7 +19,7 @@
 #endif
 
 /* update this on file format changes */
-#define CP_VERSION "2.05"
+#define CP_VERSION "3.00"
 
 /* magic number for validating output files and checking byte order */
 #define CP_MAGIC_NR 6567223
@@ -30,18 +30,33 @@
 /* max length of exe string within job record (not counting '\0') */
 #define CP_EXE_LEN (CP_JOB_RECORD_SIZE - sizeof(struct darshan_job) - 1)
 
+typedef uint64_t darshan_record_id;
+
+struct darshan_header
+{
+    char version_string[8];
+    int64_t magic_nr;
+    uint8_t comp_type;
+    uint8_t mod_count;
+};
+
+struct darshan_record
+{
+    char* name;
+    darshan_record_id id;
+    //int64_t rank; /* TODO: maybe rank doesn't go here ? */
+};
+
 /* statistics for the job as a whole */
 #define DARSHAN_JOB_METADATA_LEN 1024 /* including null terminator! */
 struct darshan_job
 {
-    char version_string[8];
-    int64_t magic_nr;
     int64_t uid;
     int64_t start_time;
     int64_t end_time;
     int64_t nprocs;
     int64_t jobid;
-    char metadata[DARSHAN_JOB_METADATA_LEN];
+    char metadata[DARSHAN_JOB_METADATA_LEN]; /* TODO: what is this? */
 };
 
 #endif /* __DARSHAN_LOG_FORMAT_H */
diff --git a/darshan-runtime/darshan-core.h b/darshan-runtime/darshan-core.h
index 955de0b..835bb42 100644
--- a/darshan-runtime/darshan-core.h
+++ b/darshan-runtime/darshan-core.h
@@ -35,8 +35,7 @@ struct darshan_core_runtime
 
 struct darshan_core_record_ref
 {
-    char* name;
-    darshan_record_id id;
+    struct darshan_record rec;
     UT_hash_handle hlink;
 };
 
diff --git a/darshan-runtime/darshan.h b/darshan-runtime/darshan.h
index 8e86e35..131d8de 100644
--- a/darshan-runtime/darshan.h
+++ b/darshan-runtime/darshan.h
@@ -44,8 +44,6 @@ typedef enum
     DARSHAN_PNETCDF_MOD,
 } darshan_module_id;
 
-typedef uint64_t darshan_record_id;
-
 struct darshan_module_funcs
 {
     void (*get_output_data)(
diff --git a/darshan-runtime/lib/darshan-core.c b/darshan-runtime/lib/darshan-core.c
index 89c0cca..60232c7 100644
--- a/darshan-runtime/lib/darshan-core.c
+++ b/darshan-runtime/lib/darshan-core.c
@@ -20,6 +20,7 @@
 #include <sys/stat.h>
 #include <sys/vfs.h>
 #include <mpi.h>
+#include <assert.h>
 
 #include "uthash.h"
 #include "darshan-core.h"
@@ -27,19 +28,27 @@
 /* TODO is __progname_full needed here */
 extern char* __progname;
 
-/* internal variables */
+/* internal variable delcarations */
 static struct darshan_core_runtime *darshan_core_job = NULL;
 static pthread_mutex_t darshan_core_mutex = PTHREAD_MUTEX_INITIALIZER;
 static int my_rank = -1;
 static int nprocs = -1;
 
-static void darshan_core_initialize(int *argc, char ***argv);
-static void darshan_core_shutdown(void);
-static void darshan_core_cleanup(struct darshan_core_runtime* job);
-static void darshan_get_logfile_name(char* logfile_name, int jobid, struct tm* start_tm);
-static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* job);
-static int darshan_get_shared_record_ids(darshan_record_id *shared_recs);
-static void darshan_write_record_map(void);
+/* prototypes for internal helper functions */
+static void darshan_core_initialize(
+    int *argc, char ***argv);
+static void darshan_core_shutdown(
+    void);
+static void darshan_core_cleanup(
+    struct darshan_core_runtime* job);
+static void darshan_get_logfile_name(
+    char* logfile_name, int jobid, struct tm* start_tm);
+static void darshan_log_record_hints_and_ver(
+    struct darshan_core_runtime* job);
+static int darshan_get_shared_record_ids(
+    struct darshan_core_runtime *job, darshan_record_id *shared_recs);
+static int darshan_write_record_map(
+    struct darshan_core_runtime *job, MPI_File log_fh, darshan_record_id *share_recs);
 
 #define DARSHAN_CORE_LOCK() pthread_mutex_lock(&darshan_core_mutex)
 #define DARSHAN_CORE_UNLOCK() pthread_mutex_unlock(&darshan_core_mutex)
@@ -114,8 +123,6 @@ static void darshan_core_initialize(int *argc, char ***argv)
         {
             memset(darshan_core_job, 0, sizeof(*darshan_core_job));
 
-            strcpy(darshan_core_job->log_job.version_string, CP_VERSION);
-            darshan_core_job->log_job.magic_nr = CP_MAGIC_NR;
             darshan_core_job->log_job.uid = getuid();
             darshan_core_job->log_job.start_time = time(NULL);
             darshan_core_job->log_job.nprocs = nprocs;
@@ -300,22 +307,6 @@ static void darshan_core_shutdown()
     /* reduce the number of times a module was opened globally and bcast to everyone */   
     DARSHAN_MPI_CALL(PMPI_Allreduce)(local_mod_use, global_mod_use_count, DARSHAN_MAX_MODS, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
 
-    /* get a list of records which are shared across all processes */
-    ret = darshan_get_shared_record_ids(shared_recs);
-    DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
-        MPI_LOR, MPI_COMM_WORLD);
-    if(all_ret != 0)
-    {
-        if(my_rank == 0)
-        {
-            fprintf(stderr, "darshan library warning: unable to determine shared file records\n");
-        }
-        free(logfile_name);
-        darshan_core_cleanup(final_job);
-        return;
-
-    }
-
     /* check environment variable to see if the default MPI file hints have
      * been overridden
      */
@@ -356,6 +347,28 @@ static void darshan_core_shutdown()
         }
     }
 
+    /* get a list of records which are shared across all processes */
+    /* TODO: do we store rank with the name map? */
+    ret = darshan_get_shared_record_ids(final_job, shared_recs);
+
+    /* error out if unable to determine shared file records */
+    DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
+        MPI_LOR, MPI_COMM_WORLD);
+    if(all_ret != 0)
+    {
+        if(my_rank == 0)
+        {
+            fprintf(stderr, "darshan library warning: unable to determine shared file records\n");
+        }
+        free(logfile_name);
+        darshan_core_cleanup(final_job);
+        return;
+
+    }
+
+    /* TODO: ensuing error checking...does MPI ensure collective I/O functions return the same error
+     * globally, or do I always need to allreduce????? */
+
     /* open the darshan log file for writing */
     ret = DARSHAN_MPI_CALL(PMPI_File_open)(MPI_COMM_WORLD, logfile_name,
         MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_EXCL, info, &log_fh);
@@ -373,7 +386,7 @@ static void darshan_core_shutdown()
 
             MPI_Error_string(ret, msg, &msg_len);
             fprintf(stderr, "darshan library warning: unable to open log file %s: %s\n",
-                    logfile_name, msg);
+                logfile_name, msg);
             unlink(logfile_name);
         }
         free(logfile_name);
@@ -395,7 +408,7 @@ static void darshan_core_shutdown()
 
             MPI_Error_string(ret, msg, &msg_len);
             fprintf(stderr, "darshan library warning: unable to seek in log file %s: %s\n",
-                    logfile_name, msg);
+                logfile_name, msg);
             unlink(logfile_name);
         }
         free(logfile_name);
@@ -403,8 +416,22 @@ static void darshan_core_shutdown()
         return;
     }
 
-    /* TODO implement */
-    darshan_write_record_map();
+    /* write the record name->id map to the log file */
+    ret = darshan_write_record_map(final_job, log_fh, shared_recs);
+
+    DARSHAN_MPI_CALL(PMPI_Allreduce)(&ret, &all_ret, 1, MPI_INT,
+        MPI_LOR, MPI_COMM_WORLD);
+    if(all_ret != 0)
+    {
+        if(my_rank == 0)
+        {
+            fprintf(stderr, "darshan library warning: unable to write record map to log file %s\n",
+                logfile_name);
+        }
+        free(logfile_name);
+        darshan_core_cleanup(final_job);
+        return;
+    }
 
     /* loop over globally used darshan modules and:
      *      - get final output buffer
@@ -515,6 +542,7 @@ static void darshan_core_shutdown()
     /* TODO: is this still right? -- write the job info on rank 0 */
     if(my_rank == 0)
     {
+        /* TODO: we want to send log_job, and offsets map */
         ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, 0, &(final_job->log_job),
             sizeof(struct darshan_job), MPI_BYTE, &status);
         if(ret != MPI_SUCCESS)
@@ -768,7 +796,8 @@ static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* job)
     return;
 }
 
-static int darshan_get_shared_record_ids(darshan_record_id *shared_recs)
+static int darshan_get_shared_record_ids(struct darshan_core_runtime *job,
+    darshan_record_id *shared_recs)
 {
     int i;
     int ndx;
@@ -782,9 +811,9 @@ static int darshan_get_shared_record_ids(darshan_record_id *shared_recs)
     if(my_rank == 0)
     {
         ndx = 0;
-        HASH_ITER(hlink, darshan_core_job->rec_hash, ref, tmp)
+        HASH_ITER(hlink, job->rec_hash, ref, tmp)
         {
-            id_array[ndx++] = ref->id;           
+            id_array[ndx++] = ref->rec.id;           
         }
     }
 
@@ -800,9 +829,9 @@ static int darshan_get_shared_record_ids(darshan_record_id *shared_recs)
     /* everyone looks to see if they opened the same records as root */
     for(i=0; (i<DARSHAN_CORE_MAX_RECORDS && id_array[i] != 0); i++)
     {
-        HASH_ITER(hlink, darshan_core_job->rec_hash, ref, tmp)
+        HASH_ITER(hlink, job->rec_hash, ref, tmp)
         {
-            if(id_array[i] == ref->id)
+            if(id_array[i] == ref->rec.id)
             {
                 /* we opened that record too */
                 mask_array[i] = 1;
@@ -831,9 +860,106 @@ static int darshan_get_shared_record_ids(darshan_record_id *shared_recs)
     return 0;
 }
 
-static void darshan_write_record_map()
+/* NOTE: the map written to file may contain duplicate id->name entries if a
+ *       record is opened by multiple ranks, but not all ranks
+ */
+static int darshan_write_record_map(struct darshan_core_runtime *job, MPI_File log_fh,
+    darshan_record_id *shared_recs)
 {
-    return;
+    int i;
+    int ret;
+    struct darshan_core_record_ref *ref, *tmp;
+    uint32_t name_len;
+    size_t record_sz;
+    size_t map_buf_sz = 0;
+    unsigned char *map_buf;
+    unsigned char *map_buf_off;
+    MPI_Status status;
+
+    /* non-root ranks (rank 0) remove shared records from their map --
+     * these records will be written by rank 0
+     */
+    if(my_rank > 0)
+    {
+        for(i=0; (i<DARSHAN_CORE_MAX_RECORDS && shared_recs[i]); i++)
+        {
+            HASH_FIND(hlink, job->rec_hash, &shared_recs[i], sizeof(darshan_record_id), ref);
+            assert(ref); /* this id had better be in the hash ... */
+            HASH_DELETE(hlink, job->rec_hash, ref);
+            if(ref->rec.name) free(ref->rec.name);
+            free(ref);
+        }
+    }
+
+    /* allocate a buffer to store at most 64 bytes for each of a max number of records */
+    /* NOTE: this buffer may be reallocated if estimate is too small */
+    map_buf_sz = DARSHAN_CORE_MAX_RECORDS * 64;
+    map_buf = malloc(map_buf_sz);
+    if(!map_buf)
+    {
+        return -1;
+    }
+
+    map_buf_off = map_buf;
+    HASH_ITER(hlink, job->rec_hash, ref, tmp)
+    {
+        name_len = strlen(ref->rec.name);
+        record_sz = sizeof(darshan_record_id) + sizeof(int) + name_len;
+        /* make sure there is room in the buffer for this record */
+        if((map_buf_off + record_sz) > (map_buf + map_buf_sz))
+        {
+            unsigned char *tmp_buf;
+            size_t old_buf_sz;
+
+            /* if no room, reallocate the map buffer at twice the current size */
+            old_buf_sz = map_buf_off - map_buf;
+            map_buf_sz *= 2;
+            tmp_buf = malloc(map_buf_sz);
+            if(!tmp_buf)
+            {
+                free(map_buf);
+                return -1;
+            }
+
+            memcpy(tmp_buf, map_buf, old_buf_sz);
+            free(map_buf);
+            map_buf = tmp_buf;
+            map_buf_off = map_buf + old_buf_sz;
+        }
+
+        /* now serialize the record into the map buffer.
+         * NOTE: darshan record map serialization method: 
+         *          ... darshan_record_id | (uint32_t) path_len | path ...
+         */
+        *((darshan_record_id *)map_buf_off) = ref->rec.id;
+        map_buf_off += sizeof(darshan_record_id);
+        *((uint32_t *)map_buf_off) = name_len;
+        map_buf_off += sizeof(uint32_t);
+        memcpy(map_buf_off, ref->rec.name, name_len);
+        map_buf_off += name_len;
+    }
+
+    /* collectively write out the record map to the darshan log */
+    if(map_buf_off > map_buf)
+    {
+        /* we have records to contribute to the collective write of the record map */
+        ret = DARSHAN_MPI_CALL(PMPI_File_write_all)(log_fh, map_buf, (map_buf_off - map_buf),
+            MPI_BYTE, &status);
+    }
+    else
+    {
+        /* we have no data to write, but participate in the collective anyway */
+        ret = DARSHAN_MPI_CALL(PMPI_File_write_all)(log_fh, NULL, 0,
+            MPI_BYTE, &status);
+    }
+    if(ret != MPI_SUCCESS)
+    {
+        return -1;
+    }
+
+    free(map_buf);
+
+    return 0;
 }
 
 /* ********************************************************* */
@@ -897,7 +1023,7 @@ void darshan_core_lookup_record_id(
     darshan_record_id tmp_id;
     struct darshan_core_record_ref* ref;
 
-    if(!darshan_core_job)
+    if(!darshan_core_job || !name)
         return;
 
     /* TODO: what do you do with printable flag? */
@@ -915,12 +1041,12 @@ void darshan_core_lookup_record_id(
         ref = malloc(sizeof(struct darshan_core_record_ref));
         if(ref)
         {
-            ref->id = tmp_id;
-            ref->name = malloc(strlen(name) + 1);
-            if(ref->name)
-                strcpy(ref->name, name);
+            ref->rec.id = tmp_id;
+            ref->rec.name = malloc(strlen(name) + 1);
+            if(ref->rec.name)
+                strcpy(ref->rec.name, name);
 
-            HASH_ADD(hlink, darshan_core_job->rec_hash, id, sizeof(darshan_record_id), ref);
+            HASH_ADD(hlink, darshan_core_job->rec_hash, rec.id, sizeof(darshan_record_id), ref);
         }
     }   
 
diff --git a/darshan-util/Makefile.in b/darshan-util/Makefile.in
index 03c1c8d..08db51e 100644
--- a/darshan-util/Makefile.in
+++ b/darshan-util/Makefile.in
@@ -1,4 +1,5 @@
-all: darshan-parser darshan-convert darshan-diff darshan-analyzer darshan-log-params darshan-util-lib
+all: darshan-posix-parser darshan-util-lib
+#all: darshan-parser darshan-convert darshan-diff darshan-analyzer darshan-log-params darshan-util-lib
 
 DESTDIR =
 srcdir = @srcdir@
@@ -41,25 +42,28 @@ mktestdir::
 uthash-1.9.2:
 	tar xjvf $(srcdir)/extern/uthash-1.9.2.tar.bz2
 
-darshan-parser: darshan-parser.c $(DARSHAN_LOG_FORMAT) darshan-logutils.h darshan-logutils.o | uthash-1.9.2
+darshan-posix-parser: darshan-posix-parser.c $(DARSHAN_LOG_FORMAT) darshan-logutils.h darshan-logutils.o | uthash-1.9.2
 	$(CC) $(CFLAGS) $(LDFLAGS) $< darshan-logutils.o -o $@ $(LIBS) 
 
-darshan-convert: darshan-convert.c $(DARSHAN_LOG_FORMAT) darshan-logutils.h darshan-logutils.o lookup3.o
-	$(CC) $(CFLAGS)  $(LDFLAGS) $< darshan-logutils.o lookup3.o -o $@ $(LIBS)
+#darshan-parser: darshan-parser.c $(DARSHAN_LOG_FORMAT) darshan-logutils.h darshan-logutils.o | uthash-1.9.2
+#	$(CC) $(CFLAGS) $(LDFLAGS) $< darshan-logutils.o -o $@ $(LIBS) 
 
-darshan-analyzer: darshan-analyzer.c $(DARSHAN_LOG_FORMAT) darshan-logutils.h darshan-logutils.o
-	$(CC) $(CFLAGS)  $(LDFLAGS) $< darshan-logutils.o -o $@ $(LIBS)
+#darshan-convert: darshan-convert.c $(DARSHAN_LOG_FORMAT) darshan-logutils.h darshan-logutils.o lookup3.o
+#	$(CC) $(CFLAGS)  $(LDFLAGS) $< darshan-logutils.o lookup3.o -o $@ $(LIBS)
 
-darshan-log-params: darshan-log-params.c $(DARSHAN_LOG_FORMAT)
-	$(CC) $(CFLAGS)  $(LDFLAGS) $< -o $@ $(LIBS)
+#darshan-analyzer: darshan-analyzer.c $(DARSHAN_LOG_FORMAT) darshan-logutils.h darshan-logutils.o
+#	$(CC) $(CFLAGS)  $(LDFLAGS) $< darshan-logutils.o -o $@ $(LIBS)
+
+#darshan-log-params: darshan-log-params.c $(DARSHAN_LOG_FORMAT)
+#	$(CC) $(CFLAGS)  $(LDFLAGS) $< -o $@ $(LIBS)
 
 jenkins: util/bin/jenkins.o lookup3.o
 	$(CC) $(CFLAGS)  $(LDFLAGS) $< -o $@ lookup3.o $(LIBS)
 
-darshan-diff: darshan-diff.o $(DARSHAN_LOG_FORMAT) darshan-logutils.o darshan-logutils.h
-	$(CC) $(CFLAGS)  $(LDFLAGS) $< darshan-logutils.o -o $@ $(LIBS)
-darshan-diff.o: darshan-diff.c
-	$(CC) $(CFLAGS) -c  $< -o $@
+#darshan-diff: darshan-diff.o $(DARSHAN_LOG_FORMAT) darshan-logutils.o darshan-logutils.h
+#	$(CC) $(CFLAGS)  $(LDFLAGS) $< darshan-logutils.o -o $@ $(LIBS)
+#darshan-diff.o: darshan-diff.c
+#	$(CC) $(CFLAGS) -c  $< -o $@
 darshan-logutils.o: darshan-logutils.c
 	$(CC) $(CFLAGS) -c  $< -o $@
 darshan-logutils.po: darshan-logutils.c
@@ -71,11 +75,11 @@ libdarshan-util.so: darshan-logutils.po
 darshan-util-lib: darshan-logutils.o
 	ar rcs libdarshan-util.a $<
 
-test/gztest: test/gztest.c mktestdir
-	$(CC) $(CFLAGS)  $(LDFLAGS) -lz $< -o $@
+#test/gztest: test/gztest.c mktestdir
+#	$(CC) $(CFLAGS)  $(LDFLAGS) -lz $< -o $@
 
-test/gz-bench: test/gz-bench.c mktestdir
-	$(CC) $(CFLAGS)  $(LDFLAGS) -lz $< -o $@
+#test/gz-bench: test/gz-bench.c mktestdir
+#	$(CC) $(CFLAGS)  $(LDFLAGS) -lz $< -o $@
 
 lookup3.o: lookup3.c
 	$(CC) $(CFLAGS) -c $< -o $@
@@ -85,25 +89,26 @@ install:: all
 	install -d $(libdir)
 	install -d $(includedir)
 	install -d $(pkgconfigdir)
-	install -m 755 darshan-parser $(bindir)
-	install -m 755 darshan-convert $(bindir)
-	install -m 755 darshan-diff $(bindir)
-	install -m 755 darshan-analyzer $(bindir)
-	install -m 755 $(srcdir)/darshan-summary-per-file.sh $(bindir)
+#	install -m 755 darshan-parser $(bindir)
+	install -m 755 darshan-posix-parser $(bindir)
+#	install -m 755 darshan-convert $(bindir)
+#	install -m 755 darshan-diff $(bindir)
+#	install -m 755 darshan-analyzer $(bindir)
+#	install -m 755 $(srcdir)/darshan-summary-per-file.sh $(bindir)
 	install -m 755 libdarshan-util.a $(libdir)
 ifeq ($(DARSHAN_ENABLE_SHARED),1)
 	install -m 755 libdarshan-util.so $(libdir)
 endif
 	install -m 644 $(srcdir)/darshan-logutils.h $(includedir)
 	install -m 644 $(DARSHAN_LOG_FORMAT) $(includedir)
-	install -m 755 darshan-job-summary/bin/darshan-job-summary.pl $(bindir)
+#	install -m 755 darshan-job-summary/bin/darshan-job-summary.pl $(bindir)
 	install -d $(libdir)/TeX
 	install -m 644 $(srcdir)/darshan-job-summary/lib/TeX/Encode.pm $(libdir)/TeX/
 	install -d $(libdir)/Number
 	install -d $(libdir)/Number/Bytes
 	install -m 644 $(srcdir)/darshan-job-summary/lib/Number/Bytes/Human.pm $(libdir)/Number/Bytes
 	install -d $(datarootdir)
-	install -m 644 $(srcdir)/darshan-job-summary/share/* $(datarootdir)
+#	install -m 644 $(srcdir)/darshan-job-summary/share/* $(datarootdir)
 	install -m 644 maint/darshan-util.pc $(pkgconfigdir)
 
 
diff --git a/darshan-util/darshan-logutils.c b/darshan-util/darshan-logutils.c
index d09c95b..9f01aa7 100644
--- a/darshan-util/darshan-logutils.c
+++ b/darshan-util/darshan-logutils.c
@@ -24,250 +24,40 @@
 
 struct darshan_fd_s
 {
-    gzFile gzf;
-#ifdef HAVE_LIBBZ2
-    BZFILE* bzf;
-#endif
+    int pf;
     int64_t pos;
     char mode[2];
     int swap_flag;
-    char version[10];
-    int job_struct_size;
+    char version[8];
     char* name;
-    int COMPAT_CP_EXE_LEN;
-};
-
-/* isn't there a clever c way to avoid this? */
-char *darshan_names[] = {
-    "CP_INDEP_OPENS",
-    "CP_COLL_OPENS",               /* count of MPI collective opens */
-    "CP_INDEP_READS",              /* count of independent MPI reads */
-    "CP_INDEP_WRITES",             /* count of independent MPI writes */
-    "CP_COLL_READS",               /* count of collective MPI reads */
-    "CP_COLL_WRITES",              /* count of collective MPI writes */
-    "CP_SPLIT_READS",              /* count of split collective MPI reads */
-    "CP_SPLIT_WRITES",             /* count of split collective MPI writes */
-    "CP_NB_READS",                 /* count of nonblocking MPI reads */
-    "CP_NB_WRITES",                /* count of nonblocking MPI writes */
-    "CP_SYNCS",                    /* count of MPI_File_sync */
-    "CP_POSIX_READS",              /* count of posix reads */
-    "CP_POSIX_WRITES",             /* count of posix writes */
-    "CP_POSIX_OPENS",              /* count of posix opens */
-    "CP_POSIX_SEEKS",              /* count of posix seeks */
-    "CP_POSIX_STATS",              /* count of posix stat/lstat/fstats */
-    "CP_POSIX_MMAPS",              /* count of posix mmaps */
-    "CP_POSIX_FREADS",
-    "CP_POSIX_FWRITES",
-    "CP_POSIX_FOPENS",
-    "CP_POSIX_FSEEKS",
-    "CP_POSIX_FSYNCS",
-    "CP_POSIX_FDSYNCS",
-    "CP_INDEP_NC_OPENS",
-    "CP_COLL_NC_OPENS",
-    "CP_HDF5_OPENS",
-    "CP_COMBINER_NAMED",           /* count of each MPI datatype category */
-    "CP_COMBINER_DUP",
-    "CP_COMBINER_CONTIGUOUS",
-    "CP_COMBINER_VECTOR",
-    "CP_COMBINER_HVECTOR_INTEGER",
-    "CP_COMBINER_HVECTOR",
-    "CP_COMBINER_INDEXED",
-    "CP_COMBINER_HINDEXED_INTEGER",
-    "CP_COMBINER_HINDEXED",
-    "CP_COMBINER_INDEXED_BLOCK",
-    "CP_COMBINER_STRUCT_INTEGER",
-    "CP_COMBINER_STRUCT",
-    "CP_COMBINER_SUBARRAY",
-    "CP_COMBINER_DARRAY",
-    "CP_COMBINER_F90_REAL",
-    "CP_COMBINER_F90_COMPLEX",
-    "CP_COMBINER_F90_INTEGER",
-    "CP_COMBINER_RESIZED",
-    "CP_HINTS",                     /* count of MPI hints used */
-    "CP_VIEWS",                     /* count of MPI set view calls */
-    "CP_MODE",                      /* mode of file */
-    "CP_BYTES_READ",                /* total bytes read */
-    "CP_BYTES_WRITTEN",             /* total bytes written */
-    "CP_MAX_BYTE_READ",             /* highest offset byte read */
-    "CP_MAX_BYTE_WRITTEN",          /* highest offset byte written */
-    "CP_CONSEC_READS",              /* count of consecutive reads */
-    "CP_CONSEC_WRITES",             /* count of consecutive writes */
-    "CP_SEQ_READS",                 /* count of sequential reads */
-    "CP_SEQ_WRITES",                /* count of sequential writes */
-    "CP_RW_SWITCHES",
-    "CP_MEM_NOT_ALIGNED",           /* count of accesses not mem aligned */
-    "CP_MEM_ALIGNMENT",             /* mem alignment in bytes */
-    "CP_FILE_NOT_ALIGNED",          /* count of accesses not file aligned */
-    "CP_FILE_ALIGNMENT",            /* file alignment in bytes */
-    "CP_MAX_READ_TIME_SIZE",
-    "CP_MAX_WRITE_TIME_SIZE",
-    "CP_SIZE_READ_0_100",           /* count of posix read size ranges */
-    "CP_SIZE_READ_100_1K",
-    "CP_SIZE_READ_1K_10K",
-    "CP_SIZE_READ_10K_100K",
-    "CP_SIZE_READ_100K_1M",
-    "CP_SIZE_READ_1M_4M",
-    "CP_SIZE_READ_4M_10M",
-    "CP_SIZE_READ_10M_100M",
-    "CP_SIZE_READ_100M_1G",
-    "CP_SIZE_READ_1G_PLUS",
-    "CP_SIZE_WRITE_0_100",          /* count of posix write size ranges */
-    "CP_SIZE_WRITE_100_1K",
-    "CP_SIZE_WRITE_1K_10K",
-    "CP_SIZE_WRITE_10K_100K",
-    "CP_SIZE_WRITE_100K_1M",
-    "CP_SIZE_WRITE_1M_4M",
-    "CP_SIZE_WRITE_4M_10M",
-    "CP_SIZE_WRITE_10M_100M",
-    "CP_SIZE_WRITE_100M_1G",
-    "CP_SIZE_WRITE_1G_PLUS",
-    "CP_SIZE_READ_AGG_0_100",       /* count of MPI read size ranges */
-    "CP_SIZE_READ_AGG_100_1K",
-    "CP_SIZE_READ_AGG_1K_10K",
-    "CP_SIZE_READ_AGG_10K_100K",
-    "CP_SIZE_READ_AGG_100K_1M",
-    "CP_SIZE_READ_AGG_1M_4M",
-    "CP_SIZE_READ_AGG_4M_10M",
-    "CP_SIZE_READ_AGG_10M_100M",
-    "CP_SIZE_READ_AGG_100M_1G",
-    "CP_SIZE_READ_AGG_1G_PLUS",
-    "CP_SIZE_WRITE_AGG_0_100",      /* count of MPI write size ranges */
-    "CP_SIZE_WRITE_AGG_100_1K",
-    "CP_SIZE_WRITE_AGG_1K_10K",
-    "CP_SIZE_WRITE_AGG_10K_100K",
-    "CP_SIZE_WRITE_AGG_100K_1M",
-    "CP_SIZE_WRITE_AGG_1M_4M",
-    "CP_SIZE_WRITE_AGG_4M_10M",
-    "CP_SIZE_WRITE_AGG_10M_100M",
-    "CP_SIZE_WRITE_AGG_100M_1G",
-    "CP_SIZE_WRITE_AGG_1G_PLUS",
-    "CP_EXTENT_READ_0_100",          /* count of MPI read extent ranges */
-    "CP_EXTENT_READ_100_1K",
-    "CP_EXTENT_READ_1K_10K",
-    "CP_EXTENT_READ_10K_100K",
-    "CP_EXTENT_READ_100K_1M",
-    "CP_EXTENT_READ_1M_4M",
-    "CP_EXTENT_READ_4M_10M",
-    "CP_EXTENT_READ_10M_100M",
-    "CP_EXTENT_READ_100M_1G",
-    "CP_EXTENT_READ_1G_PLUS",
-    "CP_EXTENT_WRITE_0_100",         /* count of MPI write extent ranges */
-    "CP_EXTENT_WRITE_100_1K",
-    "CP_EXTENT_WRITE_1K_10K",
-    "CP_EXTENT_WRITE_10K_100K",
-    "CP_EXTENT_WRITE_100K_1M",
-    "CP_EXTENT_WRITE_1M_4M",
-    "CP_EXTENT_WRITE_4M_10M",
-    "CP_EXTENT_WRITE_10M_100M",
-    "CP_EXTENT_WRITE_100M_1G",
-    "CP_EXTENT_WRITE_1G_PLUS",
-    "CP_STRIDE1_STRIDE",             /* the four most frequently appearing strides */
-    "CP_STRIDE2_STRIDE",
-    "CP_STRIDE3_STRIDE",
-    "CP_STRIDE4_STRIDE",
-    "CP_STRIDE1_COUNT",              /* count of each of the most frequent strides */
-    "CP_STRIDE2_COUNT",
-    "CP_STRIDE3_COUNT",
-    "CP_STRIDE4_COUNT",
-    "CP_ACCESS1_ACCESS",
-    "CP_ACCESS2_ACCESS",
-    "CP_ACCESS3_ACCESS",
-    "CP_ACCESS4_ACCESS",
-    "CP_ACCESS1_COUNT",
-    "CP_ACCESS2_COUNT",
-    "CP_ACCESS3_COUNT",
-    "CP_ACCESS4_COUNT",
-    "CP_DEVICE",
-    "CP_SIZE_AT_OPEN",
-    "CP_FASTEST_RANK",
-    "CP_FASTEST_RANK_BYTES",
-    "CP_SLOWEST_RANK",
-    "CP_SLOWEST_RANK_BYTES",
-
-    "CP_NUM_INDICES"
-};
-
-/* isn't there a clever c way to avoid this? */
-char *darshan_f_names[] = {
-    "CP_F_OPEN_TIMESTAMP",        /* timestamp of first open */
-    "CP_F_READ_START_TIMESTAMP",  /* timestamp of first read */
-    "CP_F_WRITE_START_TIMESTAMP", /* timestamp of first write */
-    "CP_F_CLOSE_TIMESTAMP",       /* timestamp of last close */
-    "CP_F_READ_END_TIMESTAMP",    /* timestamp of last read */
-    "CP_F_WRITE_END_TIMESTAMP",   /* timestamp of last write */
-    "CP_F_POSIX_READ_TIME",       /* cumulative posix read time */
-    "CP_F_POSIX_WRITE_TIME",      /* cumulative posix write time */
-    "CP_F_POSIX_META_TIME",       /* cumulative posix meta time */
-    "CP_F_MPI_META_TIME",         /* cumulative mpi-io metadata time */
-    "CP_F_MPI_READ_TIME",         /* cumulative mpi-io read time */
-    "CP_F_MPI_WRITE_TIME",        /* cumulative mpi-io write time */
-    "CP_F_MAX_READ_TIME",
-    "CP_F_MAX_WRITE_TIME",
-    "CP_F_FASTEST_RANK_TIME",
-    "CP_F_SLOWEST_RANK_TIME",
-    "CP_F_VARIANCE_RANK_TIME",
-    "CP_F_VARIANCE_RANK_BYTES",
-
-    "CP_F_NUM_INDICES"
+    /* TODO: ultimately store indices here */
 };
 
-/* function pointers so that we can switch functions depending on what file
- * version is detected
- */
-int (*getjob_internal)(darshan_fd file, struct darshan_job *job);
-int (*getfile_internal)(darshan_fd fd, 
-    struct darshan_job *job, 
-    struct darshan_file *file);
-#define JOB_SIZE_124 28
-#define JOB_SIZE_200 56
-#define JOB_SIZE_201 120
-#define CP_JOB_RECORD_SIZE_200 1024
-#define CP_JOB_RECORD_SIZE_1x 1024
-
-/* internal routines for parsing different file versions */
-static int getjob_internal_204(darshan_fd file, struct darshan_job *job);
-static int getjob_internal_201(darshan_fd file, struct darshan_job *job);
-static int getjob_internal_200(darshan_fd file, struct darshan_job *job);
-static int getfile_internal_204(darshan_fd fd, struct darshan_job *job, 
-    struct darshan_file *file);
-static int getfile_internal_200(darshan_fd fd, struct darshan_job *job, 
-    struct darshan_file *file);
-static int getjob_internal_124(darshan_fd file, struct darshan_job *job);
-static int getfile_internal_124(darshan_fd fd, struct darshan_job *job, 
-    struct darshan_file *file);
-static int getfile_internal_122(darshan_fd fd, struct darshan_job *job, 
-    struct darshan_file *file);
-static int getfile_internal_121(darshan_fd fd, struct darshan_job *job, 
-    struct darshan_file *file);
-static int getfile_internal_1x(darshan_fd fd, struct darshan_job *job, 
-    struct darshan_file *file, int n_counters, int n_fcounters);
-static void shift_missing_1_24(struct darshan_file* file);
-static void shift_missing_1_22(struct darshan_file* file);
-static void shift_missing_1_21(struct darshan_file* file);
-
-static int darshan_log_seek(darshan_fd fd, int64_t offset);
-static int darshan_log_read(darshan_fd fd, void* buf, int len);
-static int darshan_log_write(darshan_fd fd, void* buf, int len);
-static const char* darshan_log_error(darshan_fd fd, int* errnum);
+static int darshan_log_seek(darshan_fd fd, off_t offset);
+static int darshan_log_read(darshan_fd fd, void *buf, int len);
+static int darshan_log_write(darshan_fd fd, void *buf, int len);
+//static const char* darshan_log_error(darshan_fd fd, int* errnum);
 
 /* a rather crude API for accessing raw binary darshan files */
-darshan_fd darshan_log_open(const char *name, const char* mode)
+darshan_fd darshan_log_open(const char *name, const char *mode)
 {
-    int test_fd;
-    uint8_t magic[2];
-    int ret;
-    int try_bz2 = 1;
-    int len = strlen(name);
+    int o_flags;
 
     /* we only allows "w" or "r" modes, nothing fancy */
     assert(strlen(mode) == 1);
     assert(mode[0] == 'r' || mode[0] == 'w');
+    if(mode[0] == 'r')
+        o_flags = O_RDONLY;
+    else
+        o_flags = O_WRONLY;
 
     darshan_fd tmp_fd = malloc(sizeof(*tmp_fd));
     if(!tmp_fd)
         return(NULL);
     memset(tmp_fd, 0, sizeof(*tmp_fd));
 
+    /* TODO: why is mode needed??? */
+    /* TODO: why is name needed??? */
     tmp_fd->mode[0] = mode[0];
     tmp_fd->mode[1] = mode[1];
     tmp_fd->name  = strdup(name);
@@ -277,417 +67,180 @@ darshan_fd darshan_log_open(const char *name, const char* mode)
         return(NULL);
     }
 
-    if(strcmp(mode, "r") == 0)
-    {
-        /* Try to detect if existing file is a bzip2 file or not.  Both 
-         * libbz2 and libz will fall back to normal I/O (without compression) 
-         * automatically, so we need to do some detection manually up front 
-         * in order to get a chance to try both compression formats.
-         */
-        test_fd = open(name, O_RDONLY);
-        if(test_fd < 0)
-        {
-            perror("open");
-            free(tmp_fd->name);
-            free(tmp_fd);
-            return(NULL);
-        }
-        ret = read(test_fd, &magic, 2);
-        if(ret != 2)
-        {
-            fprintf(stderr, "Error: failed to read any data from %s.\n", 
-                name);
-            free(tmp_fd->name);
-            free(tmp_fd);
-            close(test_fd);
-            return(NULL);
-        }
-        /* header magic for bz2 */
-        if(magic[0] != 0x42 && magic[1] != 0x5A)
-        {
-            try_bz2 = 0;
-        }
-        close(test_fd);
-    }
-
-    if(strcmp(mode, "w") == 0)
-    {
-        /* TODO: is this the behavior that we want? */
-        /* if we are writing a new file, go by the file extension to tell
-         * whether to use bz2 or not?
-         */
-        if(len >= 3 && name[len-3] == 'b' && name[len-2] == 'z' && name[len-1] == '2')
-            try_bz2 = 1;
-        else
-            try_bz2 = 0;
-    }
-
-#ifdef HAVE_LIBBZ2
-    if(try_bz2)
-    {
-        tmp_fd->bzf = BZ2_bzopen(name, mode);
-        if(!tmp_fd->bzf)
-        {
-            free(tmp_fd->name);
-            free(tmp_fd);
-            return(NULL);
-        }
-        return(tmp_fd);
-    }
-#else
-    if(try_bz2)
-    {
-        fprintf(stderr, "Error: this Darshan build does not support bz2 files.\n");
-        fprintf(stderr, "Error: please install libbz2-dev and reconfigure.\n");
-        return(NULL);
-    }
-#endif
-
-    tmp_fd->gzf = gzopen(name, mode);
-    if(!tmp_fd->gzf)
+    tmp_fd->pf = open(name, o_flags);
+    if(tmp_fd->pf < 0)
     {
         free(tmp_fd->name);
         free(tmp_fd);
-        tmp_fd = NULL;
+        return(NULL);
     }
-    return tmp_fd;
+
+    return(tmp_fd);
 }
 
-/* darshan_log_getjob()
- *
- * returns 0 on success, -1 on failure
- */
-int darshan_log_getjob(darshan_fd file, struct darshan_job *job)
+int darshan_log_getheader(darshan_fd file, struct darshan_header *header)
 {
     int ret;
-    char buffer[DARSHAN_JOB_METADATA_LEN];
 
     ret = darshan_log_seek(file, 0);
     if(ret < 0)
+    {
+        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
         return(ret);
+    }
 
-    /* read version number first so we know how to digest the rest of the
-     * file
-     */
-    ret = darshan_log_read(file, file->version, 10);
-    if(ret < 10)
+    /* read header from log file */
+    ret = darshan_log_read(file, header, sizeof(*header));
+    if(ret < sizeof(*header))
     {
-        fprintf(stderr, "Error: invalid log file (failed to read version).\n");
+        fprintf(stderr, "Error: invalid darshan log file (failed to read header).\n");
         return(-1);
     }
 
-    if(strcmp(file->version, "2.05") == 0)
-    {
-        getjob_internal = getjob_internal_204;
-        getfile_internal = getfile_internal_204;
-        file->job_struct_size = sizeof(*job);
-        file->COMPAT_CP_EXE_LEN = CP_EXE_LEN;
-    }
-    else if(strcmp(file->version, "2.04") == 0)
-    {
-        getjob_internal = getjob_internal_204;
-        getfile_internal = getfile_internal_204;
-        file->job_struct_size = sizeof(*job);
-        file->COMPAT_CP_EXE_LEN = CP_EXE_LEN;
-    }
-    else if(strcmp(file->version, "2.03") == 0)
-    {
-        getjob_internal = getjob_internal_201;
-        getfile_internal = getfile_internal_200;
-        file->job_struct_size = JOB_SIZE_201;
-        file->COMPAT_CP_EXE_LEN = CP_JOB_RECORD_SIZE_200-file->job_struct_size-1;
-    }
-    else if(strcmp(file->version, "2.02") == 0)
-    {
-        getjob_internal = getjob_internal_201;
-        getfile_internal = getfile_internal_200;
-        file->job_struct_size = JOB_SIZE_201;
-        file->COMPAT_CP_EXE_LEN = CP_JOB_RECORD_SIZE_200-file->job_struct_size-1;
-    }
-    else if(strcmp(file->version, "2.01") == 0)
-    {
-        getjob_internal = getjob_internal_201;
-        getfile_internal = getfile_internal_200;
-        file->job_struct_size = JOB_SIZE_201;
-        file->COMPAT_CP_EXE_LEN = CP_JOB_RECORD_SIZE_200-file->job_struct_size-1;
-    }
-    else if(strcmp(file->version, "2.00") == 0)
-    {
-        getjob_internal = getjob_internal_200;
-        getfile_internal = getfile_internal_200;
-        file->job_struct_size = JOB_SIZE_200;
-        file->COMPAT_CP_EXE_LEN = CP_JOB_RECORD_SIZE_200-file->job_struct_size-1;
-    }
-    else if(strcmp(file->version, "1.24") == 0)
-    {
-        getjob_internal = getjob_internal_124;
-        getfile_internal = getfile_internal_124;
-        file->job_struct_size = JOB_SIZE_124;
-        file->COMPAT_CP_EXE_LEN = CP_JOB_RECORD_SIZE_1x-file->job_struct_size-1;
-    }
-    else if(strcmp(file->version, "1.23") == 0)
-    {
-        /* same as 1.24, except that mnt points may be incorrect */
-        getjob_internal = getjob_internal_124;
-        getfile_internal = getfile_internal_124;
-        file->job_struct_size = JOB_SIZE_124;
-        file->COMPAT_CP_EXE_LEN = CP_JOB_RECORD_SIZE_1x-file->job_struct_size-1;
-    }
-    else if(strcmp(file->version, "1.22") == 0)
-    {
-        getjob_internal = getjob_internal_124;
-        getfile_internal = getfile_internal_122;
-        file->job_struct_size = JOB_SIZE_124;
-        file->COMPAT_CP_EXE_LEN = CP_JOB_RECORD_SIZE_1x-file->job_struct_size-1;
-    }
-    else if(strcmp(file->version, "1.21") == 0)
-    {
-        getjob_internal = getjob_internal_124;
-        getfile_internal = getfile_internal_121;
-        file->job_struct_size = JOB_SIZE_124;
-        file->COMPAT_CP_EXE_LEN = CP_JOB_RECORD_SIZE_1x-file->job_struct_size-1;
-    }
-    else
+    /* save the version string -- this can be used to support multiple log versions */
+    strncpy(file->version, header->version_string, 8);
+
+    if(header->magic_nr == CP_MAGIC_NR)
     {
-        fprintf(stderr, "Error: incompatible darshan file.\n");
-        fprintf(stderr, "Error: expected version %s, but got %s\n", 
-                CP_VERSION, file->version);
-        return(-1);
+        /* no byte swapping needed, this file is in host format already */
+        file->swap_flag = 0;
+        return(0);
     }
 
-    ret = getjob_internal(file, job);
-
-    if (ret == 0)
+    /* try byte swapping */
+    DARSHAN_BSWAP64(&header->magic_nr);
+    if(header->magic_nr == CP_MAGIC_NR)
     {
-#ifdef HAVE_STRNDUP
-        char *metadata = strndup(job->metadata, sizeof(job->metadata));
-#else
-        char *metadata = strdup(job->metadata);
-#endif
-        char *kv;
-        char *key;
-        char *value;
-        char *save;
-
-        for(kv=strtok_r(metadata, "\n", &save);
-            kv != NULL;
-            kv=strtok_r(NULL, "\n", &save))
-        {
-            /* NOTE: we intentionally only split on the first = character.
-             * There may be additional = characters in the value portion
-             * (for example, when storing mpi-io hints).
-             */
-            strcpy(buffer, kv);
-            key = buffer;
-            value = index(buffer, '=');
-            if(!value)
-                continue;
-            /* convert = to a null terminator to split key and value */
-            value[0] = '\0';
-            value++;
-            if (strcmp(key, "prev_ver") == 0)
-            {
-                strncpy(job->version_string, value, sizeof(job->version_string));
-            }
-        }
-        free(metadata);
+        file->swap_flag = 1;
+        return(0);
     }
 
-    return(ret);
+    /* otherwise this file is just broken */
+    fprintf(stderr, "Error: bad magic number in darshan log file.\n");
+    return(-1);
 }
 
-/* darshan_putjob()
- * write job header in gzfile
+/* darshan_log_getjob()
  *
- * return 0 on success, -1 on failure.
+ * returns 0 on success, -1 on failure
  */
-int darshan_log_putjob(darshan_fd file, struct darshan_job *job)
+int darshan_log_getjob(darshan_fd file, struct darshan_job *job)
 {
-    struct darshan_job job_copy;
-    char    pv_str[64];
-    int     ret;
-    int len;
+    int ret;
+    char buffer[DARSHAN_JOB_METADATA_LEN];
 
-    ret = darshan_log_seek(file, 0);
+    ret = darshan_log_seek(file, sizeof(struct darshan_header));
     if(ret < 0)
-        return(ret);
-
-    memset(&job_copy, 0, sizeof(job_copy));
-    memcpy(&job_copy, job, sizeof(job_copy));
-    /* check for newline in existing metadata, add if needed */
-    len = strlen(job_copy.metadata);
-    if(len > 0 && len < DARSHAN_JOB_METADATA_LEN)
     {
-        if(job_copy.metadata[len-1] != '\n')
-        {
-            job_copy.metadata[len] = '\n';
-            job_copy.metadata[len+1] = '\0';
-        }
+        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
+        return(ret);
     }
 
-    sprintf(pv_str, "prev_ver=%s\n", job->version_string);
-    sprintf(job_copy.version_string, "%s", CP_VERSION);
-    if(strlen(job_copy.metadata) + strlen(pv_str) < DARSHAN_JOB_METADATA_LEN)
-        strncat(job_copy.metadata, pv_str, strlen(pv_str));
-    else
-        sprintf(job_copy.metadata, "%s", pv_str);
-    job_copy.magic_nr = CP_MAGIC_NR;
-
-    ret = darshan_log_write(file, &job_copy, sizeof(job_copy));
-    if (ret != sizeof(job_copy))
+    /* read the job data from the log file */
+    ret = darshan_log_read(file, job, sizeof(*job));
+    if(ret < sizeof(*job))
     {
-        fprintf(stderr, "Error: failed to write job header: %d\n", ret);
+        fprintf(stderr, "Error: invalid darshan log file (failed to read job data).\n");
         return(-1);
     }
 
-    return(0);
-}
-
-/* darshan_log_getfile()
- *
- * return 1 if file record found, 0 on eof, and -1 on error
- */
-int darshan_log_getfile(darshan_fd fd, struct darshan_job *job, struct darshan_file *file)
-{
-    int ret;
-
-    ret = getfile_internal(fd, job, file);
-
-    return(ret);
-}
-
-/* darshan_log_putfile()
- *
- * return 0 if file record written, -1 on error.
- */
-int darshan_log_putfile(darshan_fd fd, struct darshan_job *job, struct darshan_file *file)
-{
-    int     ret;
-
-    if(fd->pos < CP_JOB_RECORD_SIZE)
-    {
-        ret = darshan_log_seek(fd, CP_JOB_RECORD_SIZE);
-        if(ret < 0)
-            return(ret);
-    }
-
-    ret = darshan_log_write(fd, file, sizeof(*file));
-    if (ret != sizeof(*file))
+    if(file->swap_flag)
     {
-        fprintf(stderr, "Error: writing file record failed: %d\n", ret);
-        return(-1);
+        /* swap bytes if necessary */
+        DARSHAN_BSWAP64(&job->uid);
+        DARSHAN_BSWAP64(&job->start_time);
+        DARSHAN_BSWAP64(&job->end_time);
+        DARSHAN_BSWAP64(&job->nprocs);
+        DARSHAN_BSWAP64(&job->jobid);
     }
 
     return(0);
 }
 
-/* darshan_log_getmounts()
- * 
- * retrieves mount table information from the log.  Note that devs, mnt_pts,
- * and fs_types are arrays that will be allocated by the function and must
- * be freed by the caller.  count will indicate the size of the arrays
- */
-int darshan_log_getmounts(darshan_fd fd, int64_t** devs, char*** mnt_pts, char***
-    fs_types, int* count)
+int darshan_log_getmap(darshan_fd file, unsigned char **map_buf)
 {
     int ret;
-    char* pos;
-    int array_index = 0;
-    char buf[fd->COMPAT_CP_EXE_LEN+1];
+    struct stat sbuf;
+    int map_buf_size;
 
-    ret = darshan_log_seek(fd, fd->job_struct_size);
+    ret = darshan_log_seek(file, sizeof(struct darshan_header) + CP_JOB_RECORD_SIZE);
     if(ret < 0)
+    {
+        fprintf(stderr, "Error: unable to seek in darshan log file.\n");
         return(ret);
+    }
 
-    ret = darshan_log_read(fd, buf, (fd->COMPAT_CP_EXE_LEN + 1));
-    if (ret < (fd->COMPAT_CP_EXE_LEN + 1))
-    {
-        perror("darshan_log_read");
+    /* TODO: use indices map rather than stat to determine offsets */
+    /* allocate a buffer to store the (serialized) darshan record map */
+    /* NOTE: caller's responsibility to free this allocated map buffer */
+    fstat(file->pf, &sbuf);
+    map_buf_size = sbuf.st_size - (sizeof(struct darshan_header) + CP_JOB_RECORD_SIZE);
+    *map_buf = malloc(map_buf_size);
+    if(!(*map_buf))
         return(-1);
-    }
 
-    /* count entries */
-    *count = 0;
-    pos = buf;
-    while((pos = strchr(pos, '\n')) != NULL)
+    /* read the record map from the log file */
+    ret = darshan_log_read(file, *map_buf, map_buf_size);
+    if(ret < map_buf_size)
     {
-        pos++;
-        (*count)++;
+        fprintf(stderr, "Error: invalid darshan log file (failed to read record map).\n");
+        return(-1);
     }
 
-    if(*count == 0)
+    if(file->swap_flag)
     {
-        /* no mount entries present */
-        return(0);
-    }
+        /* we need to sort out endianness issues before passing back the serialized buffer */
+        /* NOTE: darshan record map serialization method: 
+         *          ... darshan_record_id | (uint32_t) path_len | path ...
+         */
+        unsigned char *buf_ptr = *map_buf;
+        darshan_record_id *rec_id_ptr;
+        uint32_t *path_len_ptr;
 
-    /* allocate output arrays */
-    *devs = malloc((*count)*sizeof(int64_t));
-    assert(*devs);
-    *mnt_pts = malloc((*count)*sizeof(char*));
-    assert(*mnt_pts);
-    *fs_types = malloc((*count)*sizeof(char*));
-    assert(*fs_types);
-    
-    /* work backwards through the table and parse each line (except for
-     * first, which holds command line information)
-     */
-    while((pos = strrchr(buf, '\n')) != NULL)
-    {
-        /* overestimate string lengths */
-        (*mnt_pts)[array_index] = malloc(fd->COMPAT_CP_EXE_LEN);
-        assert((*mnt_pts)[array_index]);
-        (*fs_types)[array_index] = malloc(fd->COMPAT_CP_EXE_LEN);
-        assert((*fs_types)[array_index]);
-        
-        ret = sscanf(++pos, "%" PRId64 "\t%s\t%s", &(*devs)[array_index],
-            (*fs_types)[array_index], (*mnt_pts)[array_index]);
-
-        if(ret != 3)
+        while(buf_ptr < (*map_buf + map_buf_size))
         {
-            fprintf(stderr, "Error: poorly formatted mount table in log file.\n");
-            return(-1);
+            rec_id_ptr = (darshan_record_id *)buf_ptr;
+            buf_ptr += sizeof(darshan_record_id);
+            path_len_ptr = (uint32_t *)buf_ptr;
+            buf_ptr += sizeof(uint32_t);
+            buf_ptr += *path_len_ptr;
+
+            DARSHAN_BSWAP64(rec_id_ptr);
+            DARSHAN_BSWAP32(path_len_ptr);
         }
-        pos--;
-        *pos = '\0';
-        array_index++;
     }
 
-    return (0);
+    return(0);
 }
 
-/* darshan_log_putmounts
- *
- * encode mount information back into mtab format.
+/* TODO: implement */
+/* TODO: could this could be used in darshan-runtime? do we refactor so we aren't maintaining in 2 spots? */
+int darshan_log_build_map(unsigned char *map_buf, int map_buf_size, some_struct *rec_hash)
+{
+    unsigned char *buf_ptr;
+
+    return(0);
+}
+
+/* TODO: implement */
+/* TODO: could this could be used in darshan-runtime? do we refactor so we aren't maintaining in 2 spots? */
+int darshan_log_destroy_map()
+{
+    return(0);
+}
+
+#if 0
+/* darshan_log_getfile()
  *
- * returns 0 on success, -1 on failure.
+ * return 1 if file record found, 0 on eof, and -1 on error
  */
-int darshan_log_putmounts(darshan_fd fd, int64_t* devs, char** mnt_pts, char** fs_types, int count)
+int darshan_log_getfile(darshan_fd fd, struct darshan_job *job, struct darshan_file *file)
 {
-    int     ret;
-    char    line[1024];
-    int     i;
-
-    for(i=count-1; i>=0; i--)
-    {
-        sprintf(line, "\n%" PRId64 "\t%s\t%s",
-                devs[i], fs_types[i], mnt_pts[i]);
-        ret = darshan_log_write(fd, line, strlen(line));
-        if (ret != strlen(line))
-        {
-            fprintf(stderr, "Error: failed to write mount entry: %d\n", ret);
-            return(-1);
-        }
-    }
+    int ret;
 
-    /* seek ahead to end of exe region, will be zero filled */
-    ret = darshan_log_seek(fd, CP_JOB_RECORD_SIZE);
-    if (ret)
-    {
-        fprintf(stderr, "Error: forward seek failed: %d\n", CP_JOB_RECORD_SIZE);
-    }
+    ret = getfile_internal(fd, job, file);
 
-    return(0);
+    return(ret);
 }
 
 int darshan_log_getexe(darshan_fd fd, char *buf)
@@ -716,48 +269,18 @@ int darshan_log_getexe(darshan_fd fd, char *buf)
 
     return (0);
 }
-
-/* darshan_log_putexe()
- *
- * Write the exe string to the log.
- *
- * return 0 on success, -1 on failure.
- */
-int darshan_log_putexe(darshan_fd fd, char *buf)
-{
-    int     ret;
-    int     len;
-
-    ret = darshan_log_seek(fd, sizeof(struct darshan_job));
-    if(ret < 0)
-        return(ret);
-
-    len = strlen(buf);
-
-    ret = darshan_log_write(fd, buf, len);
-    if (ret != len)
-    {
-        fprintf(stderr, "Error: failed to write exe info: %d\n", ret);
-        ret = -1;
-    }
-
-    return(ret);
-}
+#endif
 
 void darshan_log_close(darshan_fd file)
 {
-#ifdef HAVE_LIBBZ2
-    if(file->bzf)
-        BZ2_bzclose(file->bzf);
-#endif
-
-    if(file->gzf)
-        gzclose(file->gzf);
+    if(file->pf)
+        close(file->pf);
 
     free(file->name);
     free(file);
 }
 
+#if 0
 /* darshan_log_print_version_warnings()
  *
  * Print summary of any problems with the detected log format
@@ -1150,170 +673,6 @@ static void shift_missing_1_24(struct darshan_file* file)
     return;
 }
 
-static int getjob_internal_204(darshan_fd file, struct darshan_job *job)
-{
-    int ret;
-
-    ret = darshan_log_seek(file, 0);
-    if(ret < 0)
-        return(ret);
-
-    ret = darshan_log_read(file, job, sizeof(*job));
-    if (ret < sizeof(*job))
-    {
-        fprintf(stderr, "Error: invalid log file (too short).\n");
-        return(-1);
-    }
-
-    if(job->magic_nr == CP_MAGIC_NR)
-    {
-        /* no byte swapping needed, this file is in host format already */
-        file->swap_flag = 0;
-        return(0);
-    }
-
-    /* try byte swapping */
-    DARSHAN_BSWAP64(&job->magic_nr);
-    if(job->magic_nr == CP_MAGIC_NR)
-    {
-        file->swap_flag = 1;
-        DARSHAN_BSWAP64(&job->uid);
-        DARSHAN_BSWAP64(&job->start_time);
-        DARSHAN_BSWAP64(&job->end_time);
-        DARSHAN_BSWAP64(&job->nprocs);
-        DARSHAN_BSWAP64(&job->jobid);
-        return(0);
-    }
-
-    /* otherwise this file is just broken */
-    fprintf(stderr, "Error: bad magic number in darshan file.\n");
-    return(-1);
-}
-
-static int getjob_internal_201(darshan_fd file, struct darshan_job *job)
-{
-    int ret;
-    struct darshan_job_201
-    {
-        char version_string[8];
-        int64_t magic_nr;
-        int64_t uid;
-        int64_t start_time;
-        int64_t end_time;
-        int64_t nprocs;
-        int64_t jobid;
-        char metadata[64];
-    } job_201;
-    memset(job, 0, sizeof(job_201));
-    memset(job, 0, sizeof(*job));
-
-    ret = darshan_log_seek(file, 0);
-    if(ret < 0)
-        return(ret);
-
-    ret = darshan_log_read(file, &job_201, sizeof(job_201));
-    if (ret < sizeof(job_201))
-    {
-        fprintf(stderr, "Error: invalid log file (too short).\n");
-        return(-1);
-    }
-
-    memcpy(job->version_string, job_201.version_string, 8);
-    job->magic_nr   = job_201.magic_nr;
-    job->uid        = job_201.uid;
-    job->start_time = job_201.start_time;
-    job->end_time   = job_201.end_time;
-    job->nprocs     = job_201.nprocs;
-    job->jobid      = job_201.jobid;
-    strncpy(job->metadata, job_201.metadata, 64);
-
-    if(job->magic_nr == CP_MAGIC_NR)
-    {
-        /* no byte swapping needed, this file is in host format already */
-        file->swap_flag = 0;
-        return(0);
-    }
-
-    /* try byte swapping */
-    DARSHAN_BSWAP64(&job->magic_nr);
-    if(job->magic_nr == CP_MAGIC_NR)
-    {
-        file->swap_flag = 1;
-        DARSHAN_BSWAP64(&job->uid);
-        DARSHAN_BSWAP64(&job->start_time);
-        DARSHAN_BSWAP64(&job->end_time);
-        DARSHAN_BSWAP64(&job->nprocs);
-        DARSHAN_BSWAP64(&job->jobid);
-        return(0);
-    }
-
-    /* otherwise this file is just broken */
-    fprintf(stderr, "Error: bad magic number in darshan file.\n");
-    return(-1);
-}
-
-
-static int getjob_internal_200(darshan_fd file, struct darshan_job *job)
-{
-    int ret;
-    struct darshan_job_200
-    {
-        char version_string[8];
-        int64_t magic_nr;
-        int64_t uid;
-        int64_t start_time;
-        int64_t end_time;
-        int64_t nprocs;
-        int64_t jobid;
-    } job_200;
-
-    memset(job, 0, sizeof(job_200));
-    memset(job, 0, sizeof(*job));
-
-    ret = darshan_log_seek(file, 0);
-    if(ret < 0)
-        return(ret);
-
-    ret = darshan_log_read(file, &job_200, sizeof(job_200));
-    if (ret < sizeof(job_200))
-    {
-        fprintf(stderr, "Error: invalid log file (too short).\n");
-        return(-1);
-    }
-
-    memcpy(job->version_string, job_200.version_string, 8);
-    job->magic_nr   = job_200.magic_nr;
-    job->uid        = job_200.uid;
-    job->start_time = job_200.start_time;
-    job->end_time   = job_200.end_time;
-    job->nprocs     = job_200.nprocs;
-    job->jobid      = job_200.jobid;
-
-    if(job->magic_nr == CP_MAGIC_NR)
-    {
-        /* no byte swapping needed, this file is in host format already */
-        file->swap_flag = 0;
-        return(0);
-    }
-
-    /* try byte swapping */
-    DARSHAN_BSWAP64(&job->magic_nr);
-    if(job->magic_nr == CP_MAGIC_NR)
-    {
-        file->swap_flag = 1;
-        DARSHAN_BSWAP64(&job->uid);
-        DARSHAN_BSWAP64(&job->start_time);
-        DARSHAN_BSWAP64(&job->end_time);
-        DARSHAN_BSWAP64(&job->nprocs);
-        DARSHAN_BSWAP64(&job->jobid);
-        return(0);
-    }
-
-    /* otherwise this file is just broken */
-    fprintf(stderr, "Error: bad magic number in darshan file.\n");
-    return(-1);
-}
-
 static int getfile_internal_204(darshan_fd fd, struct darshan_job *job, 
     struct darshan_file *file)
 {
@@ -1426,76 +785,6 @@ static int getfile_internal_200(darshan_fd fd, struct darshan_job *job,
     return(-1);
 }
 
-/* If we see version 1.24, assume that it is stored in big endian 32 bit
- * format.  Convert up to current format.
- */
-static int getjob_internal_124(darshan_fd fd, struct darshan_job *job)
-{
-    char* buffer;
-    int ret;
-    uint32_t uid;
-    int32_t start_time;
-    int32_t end_time;
-    int32_t nprocs;
-
-#ifdef WORDS_BIGENDIAN
-    fd->swap_flag = 0;
-#else
-    fd->swap_flag = 1;
-#endif
-
-    memset(job, 0, sizeof(*job));
-
-    buffer = (char*)malloc(JOB_SIZE_124);
-    if(!buffer)
-    {
-        return(-1);
-    }
-
-    ret = darshan_log_seek(fd, 0);
-    if(ret < 0)
-        return(ret);
-
-    ret = darshan_log_read(fd, buffer, JOB_SIZE_124);
-    if (ret < JOB_SIZE_124)
-    {
-        fprintf(stderr, "Error: invalid log file (could not read file record).\n");
-        free(buffer);
-        return(-1);
-    }
-
-    /* pull job header information out of specific bytes in case struct
-     * padding is off
-     */
-    strncpy(job->version_string, buffer, 8);
-    uid = *((uint32_t*)&buffer[12]);
-    start_time = *((int32_t*)&buffer[16]);
-    end_time = *((int32_t*)&buffer[20]);
-    nprocs = *((int32_t*)&buffer[24]);
-
-    free(buffer);
-
-    if(fd->swap_flag)
-    {
-        /* byte swap */
-        DARSHAN_BSWAP32(&uid);
-        DARSHAN_BSWAP32(&start_time);
-        DARSHAN_BSWAP32(&end_time);
-        DARSHAN_BSWAP32(&nprocs);
-    }
-
-    job->uid += uid;
-    job->start_time += start_time;
-    job->end_time += end_time;
-    job->nprocs += nprocs;
-    job->jobid = 0; /* old log versions did not have this field */
-    
-    /* set magic number */
-    job->magic_nr = CP_MAGIC_NR;
-
-    return(0);
-}
-
 static int getfile_internal_124(darshan_fd fd, struct darshan_job *job, 
     struct darshan_file *file)
 {
@@ -1624,63 +913,45 @@ static int getfile_internal_1x(darshan_fd fd, struct darshan_job *job,
     free(buffer);
     return(1);
 }
+#endif
 
+/* ** SDS ** */
 /* return amount written on success, -1 on failure.
  */
 static int darshan_log_write(darshan_fd fd, void* buf, int len)
 {
     int ret;
 
-    if(fd->gzf)
-    {
-        ret = gzwrite(fd->gzf, buf, len);
-        if(ret > 0)
-            fd->pos += ret;
-        return(ret);
-    }
-
-#ifdef HAVE_LIBBZ2
-    if(fd->bzf)
+    if(fd->pf)
     {
-        ret = BZ2_bzwrite(fd->bzf, buf, len);
+        ret = write(fd->pf, buf, len);
         if(ret > 0)
             fd->pos += ret;
         return(ret);
     }
-#endif
 
     return(-1);
 }
 
-
+/* ** SDS ** */
 /* return amount read on success, 0 on EOF, -1 on failure.
  */
 static int darshan_log_read(darshan_fd fd, void* buf, int len)
 {
     int ret;
 
-    if(fd->gzf)
-    {
-        ret = gzread(fd->gzf, buf, len);
-        if(ret > 0)
-            fd->pos += ret;
-        return(ret);
-    }
-
-#ifdef HAVE_LIBBZ2
-    if(fd->bzf)
+    if(fd->pf)
     {
-        ret = BZ2_bzread(fd->bzf, buf, len);
+        ret = read(fd->pf, buf, len);
         if(ret > 0)
             fd->pos += ret;
         return(ret);
     }
-#endif
 
     return(-1);
 }
 
-
+#if 0
 static const char* darshan_log_error(darshan_fd fd, int* errnum)
 {
     if(fd->gzf)
@@ -1698,80 +969,26 @@ static const char* darshan_log_error(darshan_fd fd, int* errnum)
     *errnum = 0;
     return(NULL);
 }
+#endif
 
+/* ** SDS ** */
 /* return 0 on successful seek to offset, -1 on failure.
  */
-static int darshan_log_seek(darshan_fd fd, int64_t offset)
+static int darshan_log_seek(darshan_fd fd, off_t offset)
 {
-    z_off_t zoff = 0;
-    z_off_t zoff_ret = 0;
+    off_t ret_off;
+
+    /* TODO: need to look at each use case here -- do I have everything right? */
 
     if(fd->pos == offset)
         return(0);
 
-    if(fd->gzf)
-    {
-        zoff += offset;
-        zoff_ret = gzseek(fd->gzf, zoff, SEEK_SET);
-        if(zoff_ret == zoff)
-        {
-            fd->pos = offset;
-            return(0);
-        }
-        return(-1);
-    }
-
-#ifdef HAVE_LIBBZ2
-    if(fd->bzf)
+    ret_off = lseek(fd->pf, offset, SEEK_SET);
+    if(ret_off == offset)
     {
-        int64_t counter;
-        char dummy = '\0';
-        int ret;
-
-        /* There is no seek in bzip2.  Just close, reopen, and throw away 
-         * data until the correct offset.  Very slow, but we don't expect to
-         * do this often.
-         */
-        if(fd->mode[0] == 'r' && offset < fd->pos)
-        {
-            /* to seek backwards in read-only mode we just close and re-open
-             * the file
-             */
-            BZ2_bzclose(fd->bzf);
-            fd->bzf = BZ2_bzopen(fd->name, fd->mode);
-            if(!fd->bzf)
-                return(-1);
-
-            fd->pos = 0;
-        }
-        else if(fd->mode[0] == 'w' && offset < fd->pos)
-        {
-            /* there isn't any convenient way to seek backwards in a
-             * write-only bzip2 file, but we shouldn't need that
-             * functionality in darshan anyway.
-             */
-            fprintf(stderr, "Error: seeking backwards in a bzip2 compressed darshan output file is not supported.\n");
-            return(-1);
-        }
-
-        for(counter=0; counter<(offset-fd->pos); counter++)
-        {
-            if(fd->mode[0] == 'r')
-            {
-                ret = BZ2_bzread(fd->bzf, &dummy, 1);
-            }
-            else
-            {
-                ret = BZ2_bzwrite(fd->bzf, &dummy, 1);
-            }
-            if(ret != 1)
-                return(-1);
-        }
-        fd->pos += counter;
+        fd->pos = offset;
         return(0);
     }
-#endif
 
     return(-1);
 }
-
diff --git a/darshan-util/darshan-logutils.h b/darshan-util/darshan-logutils.h
index 07815fa..2bbc5a9 100644
--- a/darshan-util/darshan-logutils.h
+++ b/darshan-util/darshan-logutils.h
@@ -9,11 +9,13 @@
 
 typedef struct darshan_fd_s* darshan_fd;
 
-extern char *darshan_names[];
-extern char *darshan_f_names[];
-
 darshan_fd darshan_log_open(const char *name, const char* mode);
+int darshan_log_getheader(darshan_fd file, struct darshan_header *header);
 int darshan_log_getjob(darshan_fd file, struct darshan_job *job);
+int darshan_log_getmap(darshan_fd file, unsigned char **map_buf);
+int darshan_log_build_map(unsigned char *map_buf);
+int darshan_log_destroy_map(void);
+#if 0
 int darshan_log_putjob(darshan_fd file, struct darshan_job *job);
 int darshan_log_getfile(darshan_fd fd, 
     struct darshan_job* job, 
@@ -33,8 +35,9 @@ int darshan_log_putmounts(darshan_fd fd,
     char** mnt_pts,
     char** fs_types,
     int count);
+#endif
 void darshan_log_close(darshan_fd file);
-void darshan_log_print_version_warnings(struct darshan_job *job);
+//void darshan_log_print_version_warnings(struct darshan_job *job);
 
 /* convenience macros for printing out counters */
 #define CP_PRINT_HEADER() printf("#<rank>\t<file>\t<counter>\t<value>\t<name suffix>\t<mount pt>\t<fs type>\n")


hooks/post-receive
--



More information about the Darshan-commits mailing list