[Darshan-commits] [Darshan] branch, dev-modular, updated. 756b640d97a13630c0e30e6b84b82e975e78d369

Service Account git at mcs.anl.gov
Mon Feb 16 23:41:07 CST 2015


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "".

The branch, dev-modular has been updated
       via  756b640d97a13630c0e30e6b84b82e975e78d369 (commit)
      from  018122117ee9f50abaa2b5fd73b7d3133c09373c (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 756b640d97a13630c0e30e6b84b82e975e78d369
Author: Shane Snyder <ssnyder at mcs.anl.gov>
Date:   Mon Feb 16 23:40:20 2015 -0600

    updated runtime/util to use exe/mount info

-----------------------------------------------------------------------

Summary of changes:
 darshan-log-format.h                    |    3 +-
 darshan-runtime/darshan-core.h          |    4 +-
 darshan-runtime/doc/darshan-runtime.txt |    1 -
 darshan-runtime/lib/darshan-core.c      |  352 ++++++++++++++++++++++++------
 darshan-util/darshan-logutils.c         |  180 ++++++++++++----
 darshan-util/darshan-logutils.h         |   13 +-
 darshan-util/darshan-posix-parser.c     |   56 +++++-
 7 files changed, 476 insertions(+), 133 deletions(-)


Diff of changes:
diff --git a/darshan-log-format.h b/darshan-log-format.h
index 6f7d63d..ccd4df9 100644
--- a/darshan-log-format.h
+++ b/darshan-log-format.h
@@ -36,7 +36,6 @@ typedef uint64_t darshan_record_id;
 /* NOTES: - valid ids range from [0...DARSHAN_MAX_MODS-1]
  *        - order of ids control module shutdown order (and consequently, order in log file)
  */
-/* TODO: enforce maximum? */
 #define DARSHAN_MAX_MODS 16
 typedef enum
 {
@@ -84,7 +83,7 @@ struct darshan_job
     int64_t end_time;
     int64_t nprocs;
     int64_t jobid;
-    char metadata[DARSHAN_JOB_METADATA_LEN]; /* TODO: what is this? */
+    char metadata[DARSHAN_JOB_METADATA_LEN];
 };
 
 struct darshan_record
diff --git a/darshan-runtime/darshan-core.h b/darshan-runtime/darshan-core.h
index 91f68b5..0caa468 100644
--- a/darshan-runtime/darshan-core.h
+++ b/darshan-runtime/darshan-core.h
@@ -22,14 +22,14 @@ struct darshan_core_module
 };
 
 /* in memory structure to keep up with job level data */
-/* TODO: trailing data ? */
 struct darshan_core_runtime
 {
     struct darshan_job log_job;
     char exe[CP_EXE_LEN+1];
-    double wtime_offset;
     struct darshan_core_record_ref *rec_hash;
     struct darshan_core_module* mod_array[DARSHAN_MAX_MODS];
+    double wtime_offset;
+    char *trailing_data;
 };
 
 struct darshan_core_record_ref
diff --git a/darshan-runtime/doc/darshan-runtime.txt b/darshan-runtime/doc/darshan-runtime.txt
index ade9171..c4699dd 100644
--- a/darshan-runtime/doc/darshan-runtime.txt
+++ b/darshan-runtime/doc/darshan-runtime.txt
@@ -407,7 +407,6 @@ behavior at runtime:
 * DARSHAN_DISABLE: disables Darshan instrumentation
 * DARSHAN_INTERNAL_TIMING: enables internal instrumentation that will print the time required to startup and shutdown Darshan to stderr at run time.
 * DARSHAN_LOGHINTS: specifies the MPI-IO hints to use when storing the Darshan output file.  The format is a semicolon-delimited list of key=value pairs, for example: hint1=value1;hint2=value2
-* DARSHAN_DISABLE_TIMING: disables the subset of Darshan instrumentation that gathers timing information
 * DARSHAN_MEMALIGN: specifies a value for memory alignment (CP_MEM_ALIGNMENT)
 * DARSHAN_JOBID: specifies the name of the environment variable to use for the job identifier, such as PBS_JOBID
 * DARSHAN_DISABLE_SHARED_REDUCTION: disables the step in Darshan aggregation
diff --git a/darshan-runtime/lib/darshan-core.c b/darshan-runtime/lib/darshan-core.c
index ef03e4b..ee76829 100644
--- a/darshan-runtime/lib/darshan-core.c
+++ b/darshan-runtime/lib/darshan-core.c
@@ -29,24 +29,43 @@
 extern char* __progname;
 
 /* internal variable delcarations */
-static struct darshan_core_runtime *darshan_core_job = NULL;
+static struct darshan_core_runtime *darshan_core = NULL;
 static pthread_mutex_t darshan_core_mutex = PTHREAD_MUTEX_INITIALIZER;
 static int my_rank = -1;
 static int nprocs = -1;
 
+/* FS mount information */
+#define DARSHAN_MAX_MNTS 64
+#define DARSHAN_MAX_MNT_PATH 256
+#define DARSHAN_MAX_MNT_TYPE 32
+struct mnt_data
+{
+    int64_t hash;
+    int64_t block_size;
+    char path[DARSHAN_MAX_MNT_PATH];
+    char type[DARSHAN_MAX_MNT_TYPE];
+};
+static struct mnt_data mnt_data_array[DARSHAN_MAX_MNTS];
+static int mnt_data_count = 0;
+
 /* prototypes for internal helper functions */
 static void darshan_core_initialize(
     int *argc, char ***argv);
 static void darshan_core_shutdown(
     void);
 static void darshan_core_cleanup(
-    struct darshan_core_runtime* job);
+    struct darshan_core_runtime* core);
 static void darshan_get_logfile_name(
     char* logfile_name, int jobid, struct tm* start_tm);
 static void darshan_log_record_hints_and_ver(
-    struct darshan_core_runtime* job);
+    struct darshan_core_runtime* core);
+static void darshan_get_exe_and_mounts_root(
+    struct darshan_core_runtime *core, char* trailing_data,
+    int space_left);
+static char* darshan_get_exe_and_mounts(
+    struct darshan_core_runtime *core);
 static void darshan_get_shared_record_ids(
-    struct darshan_core_runtime *job, darshan_record_id *shared_recs);
+    struct darshan_core_runtime *core, darshan_record_id *shared_recs);
 static int darshan_log_coll_open(
     char *logfile_name, MPI_File *log_fh);
 static int darshan_log_write_record_hash(
@@ -120,28 +139,30 @@ static void darshan_core_initialize(int *argc, char ***argv)
         init_start = DARSHAN_MPI_CALL(PMPI_Wtime)();
 
     /* setup darshan runtime if darshan is enabled and hasn't been initialized already */
-    if(!getenv("DARSHAN_DISABLE") && !darshan_core_job)
+    if(!getenv("DARSHAN_DISABLE") && !darshan_core)
     {
-        /* allocate structure to track darshan_core_job information */
-        darshan_core_job = malloc(sizeof(*darshan_core_job));
-        if(darshan_core_job)
+        /* TODO: darshan mem alignment code? */
+
+        /* allocate structure to track darshan_core_runtime information */
+        darshan_core = malloc(sizeof(*darshan_core));
+        if(darshan_core)
         {
-            memset(darshan_core_job, 0, sizeof(*darshan_core_job));
+            memset(darshan_core, 0, sizeof(*darshan_core));
 
-            darshan_core_job->log_job.uid = getuid();
-            darshan_core_job->log_job.start_time = time(NULL);
-            darshan_core_job->log_job.nprocs = nprocs;
-            darshan_core_job->wtime_offset = DARSHAN_MPI_CALL(PMPI_Wtime)();
+            darshan_core->log_job.uid = getuid();
+            darshan_core->log_job.start_time = time(NULL);
+            darshan_core->log_job.nprocs = nprocs;
+            darshan_core->wtime_offset = DARSHAN_MPI_CALL(PMPI_Wtime)();
 
             /* record exe and arguments */
             for(i=0; i<(*argc); i++)
             {
-                chars_left = CP_EXE_LEN-strlen(darshan_core_job->exe);
-                strncat(darshan_core_job->exe, *(argv[i]), chars_left);
+                chars_left = CP_EXE_LEN-strlen(darshan_core->exe);
+                strncat(darshan_core->exe, (*argv)[i], chars_left);
                 if(i < ((*argc)-1))
                 {
-                    chars_left = CP_EXE_LEN-strlen(darshan_core_job->exe);
-                    strncat(darshan_core_job->exe, " ", chars_left);
+                    chars_left = CP_EXE_LEN-strlen(darshan_core->exe);
+                    strncat(darshan_core->exe, " ", chars_left);
                 }
             }
 
@@ -150,19 +171,22 @@ static void darshan_core_initialize(int *argc, char ***argv)
              */
             if(argc == 0)
             {
-                chars_left = CP_EXE_LEN-strlen(darshan_core_job->exe);
-                strncat(darshan_core_job->exe, __progname, chars_left);
-                chars_left = CP_EXE_LEN-strlen(darshan_core_job->exe);
-                strncat(darshan_core_job->exe, " <unknown args>", chars_left);
+                chars_left = CP_EXE_LEN-strlen(darshan_core->exe);
+                strncat(darshan_core->exe, __progname, chars_left);
+                chars_left = CP_EXE_LEN-strlen(darshan_core->exe);
+                strncat(darshan_core->exe, " <unknown args>", chars_left);
             }
 
             if(chars_left == 0)
             {
                 /* we ran out of room; mark that string was truncated */
                 truncate_offset = CP_EXE_LEN - strlen(truncate_string);
-                sprintf(&darshan_core_job->exe[truncate_offset], "%s",
+                sprintf(&darshan_core->exe[truncate_offset], "%s",
                     truncate_string);
             }
+
+            /* collect information about command line and mounted file systems */
+            darshan_core->trailing_data = darshan_get_exe_and_mounts(darshan_core);
         }
     }
 
@@ -185,7 +209,7 @@ static void darshan_core_shutdown()
 {
     int i;
     char *logfile_name;
-    struct darshan_core_runtime *final_job;
+    struct darshan_core_runtime *final_core;
     int internal_timing_flag = 0;
     char *envjobid;
     char *jobid_str;
@@ -210,7 +234,7 @@ static void darshan_core_shutdown()
         internal_timing_flag = 1;
 
     DARSHAN_CORE_LOCK();
-    if(!darshan_core_job)
+    if(!darshan_core)
     {
         DARSHAN_CORE_UNLOCK();
         return;
@@ -218,8 +242,8 @@ static void darshan_core_shutdown()
     /* disable further tracing while hanging onto the data so that we can
      * write it out
      */
-    final_job = darshan_core_job;
-    darshan_core_job = NULL;
+    final_core = darshan_core;
+    darshan_core = NULL;
     DARSHAN_CORE_UNLOCK();
 
     start_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();
@@ -227,7 +251,7 @@ static void darshan_core_shutdown()
     logfile_name = malloc(PATH_MAX);
     if(!logfile_name)
     {
-        darshan_core_cleanup(final_job);
+        darshan_core_cleanup(final_core);
         return;
     }
 
@@ -254,18 +278,15 @@ static void darshan_core_shutdown()
             jobid = getpid();
         }
 
-        final_job->log_job.jobid = (int64_t)jobid;
+        final_core->log_job.jobid = (int64_t)jobid;
 
-/* TODO */
-#if 0
         /* if we are using any hints to write the log file, then record those
          * hints with the darshan job information
          */
-        darshan_log_record_hints_and_ver(final_job);
-#endif
+        darshan_log_record_hints_and_ver(final_core);
 
         /* use human readable start time format in log filename */
-        start_time_tmp = final_job->log_job.start_time;
+        start_time_tmp = final_core->log_job.start_time;
         start_tm = localtime(&start_time_tmp);
 
         /* construct log file name */
@@ -280,27 +301,27 @@ static void darshan_core_shutdown()
     {
         /* failed to generate log file name */
         free(logfile_name);
-        darshan_core_cleanup(final_job);
+        darshan_core_cleanup(final_core);
         return;
     }
 
-    final_job->log_job.end_time = time(NULL);
+    final_core->log_job.end_time = time(NULL);
 
     /* reduce to report first start time and last end time across all ranks
      * at rank 0
      */
-    DARSHAN_MPI_CALL(PMPI_Reduce)(&final_job->log_job.start_time, &first_start_time, 1, MPI_LONG_LONG, MPI_MIN, 0, MPI_COMM_WORLD);
-    DARSHAN_MPI_CALL(PMPI_Reduce)(&final_job->log_job.end_time, &last_end_time, 1, MPI_LONG_LONG, MPI_MAX, 0, MPI_COMM_WORLD);
+    DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job.start_time, &first_start_time, 1, MPI_LONG_LONG, MPI_MIN, 0, MPI_COMM_WORLD);
+    DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job.end_time, &last_end_time, 1, MPI_LONG_LONG, MPI_MAX, 0, MPI_COMM_WORLD);
     if(my_rank == 0)
     {
-        final_job->log_job.start_time = first_start_time;
-        final_job->log_job.end_time = last_end_time;
+        final_core->log_job.start_time = first_start_time;
+        final_core->log_job.end_time = last_end_time;
     }
 
     /* set which local modules were actually used */
     for(i = 0; i < DARSHAN_MAX_MODS; i++)
     {
-        if(final_job->mod_array[i])
+        if(final_core->mod_array[i])
             local_mod_use[i] = 1;
     }
 
@@ -308,7 +329,7 @@ static void darshan_core_shutdown()
     DARSHAN_MPI_CALL(PMPI_Allreduce)(local_mod_use, global_mod_use_count, DARSHAN_MAX_MODS, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
 
     /* get a list of records which are shared across all processes */
-    darshan_get_shared_record_ids(final_job, shared_recs);
+    darshan_get_shared_record_ids(final_core, shared_recs);
 
     /* collectively open the darshan log file */
     ret = darshan_log_coll_open(logfile_name, &log_fh);
@@ -325,16 +346,25 @@ static void darshan_core_shutdown()
             unlink(logfile_name);
         }
         free(logfile_name);
-        darshan_core_cleanup(final_job);
+        darshan_core_cleanup(final_core);
         return;
     }
 
     /* rank 0 is responsible for writing the darshan job information */
     if(my_rank == 0)
     {
+        unsigned char tmp_buf[CP_JOB_RECORD_SIZE];
+        unsigned char *tmp_ptr;
+
+        /* pack the job info and exe/mount info into a buffer for writing */
+        tmp_ptr = tmp_buf;
+        memcpy(tmp_ptr, &final_core->log_job, sizeof(struct darshan_job));
+        tmp_ptr += sizeof(struct darshan_job);
+        memcpy(tmp_ptr, final_core->trailing_data, CP_EXE_LEN+1);
+
         /* write the job information, making sure to prealloc space for the log header */
         all_ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, sizeof(struct darshan_header),
-                &final_job->log_job, sizeof(struct darshan_job), MPI_BYTE, &status);
+                tmp_buf, CP_JOB_RECORD_SIZE, MPI_BYTE, &status);
         if(all_ret != MPI_SUCCESS)
         {
             fprintf(stderr, "darshan library warning: unable to write job data to log file %s\n",
@@ -343,7 +373,7 @@ static void darshan_core_shutdown()
         }
 
         /* TODO: after compression is added, this should be fixed */
-        log_header.rec_map.off = sizeof(struct darshan_header) + sizeof(struct darshan_job);
+        log_header.rec_map.off = sizeof(struct darshan_header) + CP_JOB_RECORD_SIZE;
     }
 
     /* error out if unable to write job information */
@@ -351,12 +381,12 @@ static void darshan_core_shutdown()
     if(all_ret != 0)
     {
         free(logfile_name);
-        darshan_core_cleanup(final_job);
+        darshan_core_cleanup(final_core);
         return;
     }
 
     /* write the record name->id hash to the log file */
-    ret = darshan_log_write_record_hash(log_fh, final_job->rec_hash,
+    ret = darshan_log_write_record_hash(log_fh, final_core->rec_hash,
         shared_recs, &log_header.rec_map);
 
     /* error out if unable to write record hash */
@@ -371,7 +401,7 @@ static void darshan_core_shutdown()
             unlink(logfile_name);
         }
         free(logfile_name);
-        darshan_core_cleanup(final_job);
+        darshan_core_cleanup(final_core);
         return;
     }
 
@@ -384,7 +414,7 @@ static void darshan_core_shutdown()
      */
     for(i = 0; i < DARSHAN_MAX_MODS; i++)
     {
-        struct darshan_core_module* this_mod = final_job->mod_array[i];
+        struct darshan_core_module* this_mod = final_core->mod_array[i];
         MPI_Comm mod_comm;
         void* mod_buf = NULL;
         int mod_buf_size = 0;
@@ -436,7 +466,7 @@ static void darshan_core_shutdown()
                 unlink(logfile_name);
             }
             free(logfile_name);
-            darshan_core_cleanup(final_job);
+            darshan_core_cleanup(final_core);
             return;
         }
 
@@ -475,7 +505,7 @@ static void darshan_core_shutdown()
     if(all_ret != 0)
     {
         free(logfile_name);
-        darshan_core_cleanup(final_job);
+        darshan_core_cleanup(final_core);
         return;
     }
 
@@ -512,7 +542,7 @@ static void darshan_core_shutdown()
     }
 
     free(logfile_name);
-    darshan_core_cleanup(final_job);
+    darshan_core_cleanup(final_core);
 
     if(internal_timing_flag)
     {
@@ -523,20 +553,20 @@ static void darshan_core_shutdown()
 }
 
 /* free darshan core data structures to shutdown */
-static void darshan_core_cleanup(struct darshan_core_runtime* job)
+static void darshan_core_cleanup(struct darshan_core_runtime* core)
 {
     int i;
 
     for(i = 0; i < DARSHAN_MAX_MODS; i++)
     {
-        if(job->mod_array[i])
+        if(core->mod_array[i])
         {        
-            free(job->mod_array[i]);
-            job->mod_array[i] = NULL;
+            free(core->mod_array[i]);
+            core->mod_array[i] = NULL;
         }
     }
 
-    free(job);
+    free(core);
 
     return;
 }
@@ -677,7 +707,7 @@ static void darshan_get_logfile_name(char* logfile_name, int jobid, struct tm* s
 }
 
 /* record any hints used to write the darshan log in the log header */
-static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* job)
+static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* core)
 {
     char* hints;
     char* header_hints;
@@ -701,15 +731,15 @@ static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* job)
         return;
 
     meta_remain = DARSHAN_JOB_METADATA_LEN -
-        strlen(job->log_job.metadata) - 1;
+        strlen(core->log_job.metadata) - 1;
     if(meta_remain >= (strlen(PACKAGE_VERSION) + 9))
     {
-        sprintf(job->log_job.metadata, "lib_ver=%s\n", PACKAGE_VERSION);
+        sprintf(core->log_job.metadata, "lib_ver=%s\n", PACKAGE_VERSION);
         meta_remain -= (strlen(PACKAGE_VERSION) + 9);
     }
     if(meta_remain >= (3 + strlen(header_hints)))
     {
-        m = job->log_job.metadata + strlen(job->log_job.metadata);
+        m = core->log_job.metadata + strlen(core->log_job.metadata);
         /* We have room to store the hints in the metadata portion of
          * the job header.  We just prepend an h= to the hints list.  The
          * metadata parser will ignore = characters that appear in the value
@@ -722,7 +752,187 @@ static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* job)
     return;
 }
 
-static void darshan_get_shared_record_ids(struct darshan_core_runtime *job,
+static int mnt_data_cmp(const void* a, const void* b)
+{
+    const struct mnt_data *d_a = (const struct mnt_data*)a;
+    const struct mnt_data *d_b = (const struct mnt_data*)b;
+
+    if(strlen(d_a->path) > strlen(d_b->path))
+        return(-1);
+    else if(strlen(d_a->path) < strlen(d_b->path))
+        return(1);
+    else
+        return(0);
+}
+
+/* adds an entry to table of mounted file systems */
+static void add_entry(char* trailing_data, int* space_left, struct mntent *entry)
+{
+    int ret;
+    char tmp_mnt[256];
+    struct statfs statfsbuf;
+
+    strncpy(mnt_data_array[mnt_data_count].path, entry->mnt_dir,
+        DARSHAN_MAX_MNT_PATH-1);
+    strncpy(mnt_data_array[mnt_data_count].type, entry->mnt_type,
+        DARSHAN_MAX_MNT_TYPE-1);
+    mnt_data_array[mnt_data_count].hash =
+        darshan_hash((void*)mnt_data_array[mnt_data_count].path,
+        strlen(mnt_data_array[mnt_data_count].path), 0);
+    /* NOTE: we now try to detect the preferred block size for each file 
+     * system using fstatfs().  On Lustre we assume a size of 1 MiB 
+     * because fstatfs() reports 4 KiB. 
+     */
+#ifndef LL_SUPER_MAGIC
+#define LL_SUPER_MAGIC 0x0BD00BD0
+#endif
+    ret = statfs(entry->mnt_dir, &statfsbuf);
+    if(ret == 0 && statfsbuf.f_type != LL_SUPER_MAGIC)
+        mnt_data_array[mnt_data_count].block_size = statfsbuf.f_bsize;
+    else if(ret == 0 && statfsbuf.f_type == LL_SUPER_MAGIC)
+        mnt_data_array[mnt_data_count].block_size = 1024*1024;
+    else
+        mnt_data_array[mnt_data_count].block_size = 4096;
+
+    /* store mount information for use in header of darshan log */
+    ret = snprintf(tmp_mnt, 256, "\n%" PRId64 "\t%s\t%s",
+        mnt_data_array[mnt_data_count].hash,
+        entry->mnt_type, entry->mnt_dir);
+    if(ret < 256 && strlen(tmp_mnt) <= (*space_left))
+    {
+        strcat(trailing_data, tmp_mnt);
+        (*space_left) -= strlen(tmp_mnt);
+    }
+
+    mnt_data_count++;
+    return;
+}
+
+/* darshan_get_exe_and_mounts_root()
+ *
+ * collects command line and list of mounted file systems into a string that
+ * will be stored with the job header
+ */
+static void darshan_get_exe_and_mounts_root(struct darshan_core_runtime *core,
+    char* trailing_data, int space_left)
+{
+    FILE* tab;
+    struct mntent *entry;
+    char* exclude;
+    int tmp_index = 0;
+    int skip = 0;
+
+    /* skip these fs types */
+    static char* fs_exclusions[] = {
+        "tmpfs",
+        "proc",
+        "sysfs",
+        "devpts",
+        "binfmt_misc",
+        "fusectl",
+        "debugfs",
+        "securityfs",
+        "nfsd",
+        "none",
+        "rpc_pipefs",
+        "hugetlbfs",
+        "cgroup",
+        NULL
+    };
+
+    /* length of exe has already been safety checked in darshan-posix.c */
+    strcat(trailing_data, core->exe);
+    space_left = CP_EXE_LEN - strlen(trailing_data);
+
+    /* we make two passes through mounted file systems; in the first pass we
+     * grab any non-nfs mount points, then on the second pass we grab nfs
+     * mount points
+     */
+
+    tab = setmntent("/etc/mtab", "r");
+    if(!tab)
+        return;
+    /* loop through list of mounted file systems */
+    while(mnt_data_count<DARSHAN_MAX_MNTS && (entry = getmntent(tab)) != NULL)
+    {
+        /* filter out excluded fs types */
+        tmp_index = 0;
+        skip = 0;
+        while((exclude = fs_exclusions[tmp_index]))
+        {
+            if(!(strcmp(exclude, entry->mnt_type)))
+            {
+                skip =1;
+                break;
+            }
+            tmp_index++;
+        }
+
+        if(skip || (strcmp(entry->mnt_type, "nfs") == 0))
+            continue;
+
+        add_entry(trailing_data, &space_left, entry);
+    }
+    endmntent(tab);
+
+    tab = setmntent("/etc/mtab", "r");
+    if(!tab)
+        return;
+    /* loop through list of mounted file systems */
+    while(mnt_data_count<DARSHAN_MAX_MNTS && (entry = getmntent(tab)) != NULL)
+    {
+        if(strcmp(entry->mnt_type, "nfs") != 0)
+            continue;
+
+        add_entry(trailing_data, &space_left, entry);
+    }
+    endmntent(tab);
+
+    /* Sort mount points in order of longest path to shortest path.  This is
+     * necessary so that if we try to match file paths to mount points later
+     * we don't match on "/" every time.
+     */
+    qsort(mnt_data_array, mnt_data_count, sizeof(mnt_data_array[0]), mnt_data_cmp);
+    return;
+}
+
+/* darshan_get_exe_and_mounts()
+ *
+ * collects command line and list of mounted file systems into a string that
+ * will be stored with the job header
+ */
+static char* darshan_get_exe_and_mounts(struct darshan_core_runtime *core)
+{
+    char* trailing_data;
+    int space_left;
+
+    space_left = CP_EXE_LEN + 1;
+    trailing_data = malloc(space_left);
+    if(!trailing_data)
+    {
+        return(NULL);
+    }
+    memset(trailing_data, 0, space_left);
+
+    if(my_rank == 0)
+    {
+        darshan_get_exe_and_mounts_root(core, trailing_data, space_left);
+    }
+
+    /* broadcast trailing data to all nodes */
+    DARSHAN_MPI_CALL(PMPI_Bcast)(trailing_data, space_left, MPI_CHAR, 0,
+        MPI_COMM_WORLD);
+    /* broadcast mount count to all nodes */
+    DARSHAN_MPI_CALL(PMPI_Bcast)(&mnt_data_count, 1, MPI_INT, 0,
+        MPI_COMM_WORLD);
+    /* broadcast mount data to all nodes */
+    DARSHAN_MPI_CALL(PMPI_Bcast)(mnt_data_array,
+        mnt_data_count*sizeof(mnt_data_array[0]), MPI_BYTE, 0, MPI_COMM_WORLD);
+
+    return(trailing_data);
+}
+
+static void darshan_get_shared_record_ids(struct darshan_core_runtime *core,
     darshan_record_id *shared_recs)
 {
     int i;
@@ -736,7 +946,7 @@ static void darshan_get_shared_record_ids(struct darshan_core_runtime *job,
     if(my_rank == 0)
     {
         ndx = 0;
-        HASH_ITER(hlink, job->rec_hash, ref, tmp)
+        HASH_ITER(hlink, core->rec_hash, ref, tmp)
         {
             id_array[ndx++] = ref->rec.id;           
         }
@@ -750,7 +960,7 @@ static void darshan_get_shared_record_ids(struct darshan_core_runtime *job,
     /* everyone looks to see if they opened the same records as root */
     for(i=0; (i<DARSHAN_CORE_MAX_RECORDS && id_array[i] != 0); i++)
     {
-        HASH_ITER(hlink, job->rec_hash, ref, tmp)
+        HASH_ITER(hlink, core->rec_hash, ref, tmp)
         {
             if(id_array[i] == ref->rec.id)
             {
@@ -1007,14 +1217,14 @@ void darshan_core_register_module(
     DARSHAN_CORE_LOCK();
 
     *runtime_mem_limit = 0;
-    if(!darshan_core_job || (id >= DARSHAN_MAX_MODS))
+    if(!darshan_core || (id >= DARSHAN_MAX_MODS))
     {
         DARSHAN_CORE_UNLOCK();
         return;
     }
 
     /* see if this module is already registered */
-    if(darshan_core_job->mod_array[id])
+    if(darshan_core->mod_array[id])
     {
         /* if module is already registered just return */
         /* NOTE: we do not recalculate memory limit here, just set to 0 */
@@ -1035,7 +1245,7 @@ void darshan_core_register_module(
     mod->mod_funcs = *funcs;
 
     /* register module with darshan */
-    darshan_core_job->mod_array[id] = mod;
+    darshan_core->mod_array[id] = mod;
 
     /* TODO: something smarter than just 2 MiB per module */
     *runtime_mem_limit = 2 * 1024 * 1024;
@@ -1054,7 +1264,7 @@ void darshan_core_lookup_record_id(
     darshan_record_id tmp_id;
     struct darshan_core_record_ref* ref;
 
-    if(!darshan_core_job || !name)
+    if(!darshan_core || !name)
         return;
 
     /* TODO: what do you do with printable flag? */
@@ -1065,7 +1275,7 @@ void darshan_core_lookup_record_id(
     DARSHAN_CORE_LOCK();
 
     /* check to see if we've already stored the id->name mapping for this record */
-    HASH_FIND(hlink, darshan_core_job->rec_hash, &tmp_id, sizeof(darshan_record_id), ref);
+    HASH_FIND(hlink, darshan_core->rec_hash, &tmp_id, sizeof(darshan_record_id), ref);
     if(!ref)
     {
         /* if not, add this record to the hash */
@@ -1077,7 +1287,7 @@ void darshan_core_lookup_record_id(
             if(ref->rec.name)
                 strcpy(ref->rec.name, name);
 
-            HASH_ADD(hlink, darshan_core_job->rec_hash, rec.id, sizeof(darshan_record_id), ref);
+            HASH_ADD(hlink, darshan_core->rec_hash, rec.id, sizeof(darshan_record_id), ref);
         }
     }   
 
@@ -1091,12 +1301,12 @@ void darshan_core_lookup_record_id(
 
 double darshan_core_wtime()
 {
-    if(!darshan_core_job)
+    if(!darshan_core)
     {
         return(0);
     }
 
-    return(DARSHAN_MPI_CALL(PMPI_Wtime)() - darshan_core_job->wtime_offset);
+    return(DARSHAN_MPI_CALL(PMPI_Wtime)() - darshan_core->wtime_offset);
 }
 
 /*
diff --git a/darshan-util/darshan-logutils.c b/darshan-util/darshan-logutils.c
index 7633049..ab1b82f 100644
--- a/darshan-util/darshan-logutils.c
+++ b/darshan-util/darshan-logutils.c
@@ -27,10 +27,9 @@ struct darshan_fd_s
 {
     int pf;
     int64_t pos;
-    char mode[2];
-    int swap_flag;
     char version[8];
-    char* name;
+    int swap_flag;
+    char *exe_mnt_data;
     struct darshan_log_map job_map;
     struct darshan_log_map rec_map;
     struct darshan_log_map mod_map[DARSHAN_MAX_MODS];
@@ -64,21 +63,9 @@ darshan_fd darshan_log_open(const char *name, const char *mode)
         return(NULL);
     memset(tmp_fd, 0, sizeof(*tmp_fd));
 
-    /* TODO: why is mode needed??? */
-    /* TODO: why is name needed??? */
-    tmp_fd->mode[0] = mode[0];
-    tmp_fd->mode[1] = mode[1];
-    tmp_fd->name  = strdup(name);
-    if(!tmp_fd->name)
-    {
-        free(tmp_fd);
-        return(NULL);
-    }
-
     tmp_fd->pf = open(name, o_flags);
     if(tmp_fd->pf < 0)
     {
-        free(tmp_fd->name);
         free(tmp_fd);
         return(NULL);
     }
@@ -163,6 +150,7 @@ int darshan_log_getheader(darshan_fd fd, struct darshan_header *header)
  */
 int darshan_log_getjob(darshan_fd fd, struct darshan_job *job)
 {
+    char job_buf[CP_JOB_RECORD_SIZE] = {0};
     int ret;
 
     ret = darshan_log_seek(fd, fd->job_map.off);
@@ -173,13 +161,15 @@ int darshan_log_getjob(darshan_fd fd, struct darshan_job *job)
     }
 
     /* read the job data from the log file */
-    ret = darshan_log_read(fd, job, fd->job_map.len);
+    ret = darshan_log_read(fd, job_buf, fd->job_map.len);
     if(ret < fd->job_map.len)
     {
         fprintf(stderr, "Error: invalid darshan log file (failed to read job data).\n");
         return(-1);
     }
 
+    memcpy(job, job_buf, sizeof(*job));
+
     if(fd->swap_flag)
     {
         /* swap bytes if necessary */
@@ -190,6 +180,131 @@ int darshan_log_getjob(darshan_fd fd, struct darshan_job *job)
         DARSHAN_BSWAP64(&job->jobid);
     }
 
+    /* save trailing job data, so exe and mount information can be retrieved later */
+    fd->exe_mnt_data = malloc(CP_EXE_LEN+1);
+    if(!fd->exe_mnt_data)
+        return(-1);
+    memcpy(fd->exe_mnt_data, &job_buf[sizeof(*job)], CP_EXE_LEN+1);
+
+    return(0);
+}
+
+#if 0
+#ifdef HAVE_STRNDUP
+    metadata = strndup(job->metadata, sizeof(job->metadata));
+#else
+    metadata = strdup(job->metadata);
+#endif
+    char *kv;
+    char *key;
+    char *value;
+    char *save;
+
+    for(kv=strtok_r(metadata, "\n", &save);
+        kv != NULL;
+        kv=strtok_r(NULL, "\n", &save))
+    {
+        /* NOTE: we intentionally only split on the first = character.
+         * There may be additional = characters in the value portion
+         * (for example, when storing mpi-io hints).
+         */
+        strcpy(buffer, kv);
+        key = buffer;
+        value = index(buffer, '=');
+        if(!value)
+            continue;
+        /* convert = to a null terminator to split key and value */
+        value[0] = '\0';
+        value++;
+        if (strcmp(key, "prev_ver") == 0)
+        {
+            strncpy(job->version_string, value, sizeof(job->version_string));
+        }
+    }
+    free(metadata);
+#endif
+
+int darshan_log_getexe(darshan_fd fd, char *buf)
+{
+    char *newline;
+
+    /* TODO: try reading log job one more time to set this buffer up */
+    if(!fd->exe_mnt_data)
+        return(-1);
+
+    newline = strchr(fd->exe_mnt_data, '\n');
+
+    /* copy over the exe string */
+    if(newline)
+        memcpy(buf, fd->exe_mnt_data, (newline - fd->exe_mnt_data));
+
+    return (0);
+}
+
+/* darshan_log_getmounts()
+ * 
+ * retrieves mount table information from the log.  Note that devs, mnt_pts,
+ * and fs_types are arrays that will be allocated by the function and must
+ * be freed by the caller.  count will indicate the size of the arrays
+ */
+int darshan_log_getmounts(darshan_fd fd, int64_t** devs, char*** mnt_pts,
+    char*** fs_types, int* count)
+{
+    int ret;
+    char *pos;
+    int array_index = 0;
+
+    /* TODO: try reading log job one more time to set this buffer up */
+    if(!fd->exe_mnt_data)
+        return(-1);
+
+    /* count entries */
+    *count = 0;
+    pos = fd->exe_mnt_data;
+    while((pos = strchr(pos, '\n')) != NULL)
+    {
+        pos++;
+        (*count)++;
+    }
+
+    if(*count == 0)
+    {
+        /* no mount entries present */
+        return(0);
+    }
+
+    /* allocate output arrays */
+    *devs = malloc((*count)*sizeof(int64_t));
+    assert(*devs);
+    *mnt_pts = malloc((*count)*sizeof(char*));
+    assert(*mnt_pts);
+    *fs_types = malloc((*count)*sizeof(char*));
+    assert(*fs_types);
+
+    /* work backwards through the table and parse each line (except for
+     * first, which holds command line information)
+     */
+    while((pos = strrchr(fd->exe_mnt_data, '\n')) != NULL)
+    {
+        /* overestimate string lengths */
+        (*mnt_pts)[array_index] = malloc(CP_EXE_LEN);
+        assert((*mnt_pts)[array_index]);
+        (*fs_types)[array_index] = malloc(CP_EXE_LEN);
+        assert((*fs_types)[array_index]);
+
+        ret = sscanf(++pos, "%" PRId64 "\t%s\t%s", &(*devs)[array_index],
+            (*fs_types)[array_index], (*mnt_pts)[array_index]);
+
+        if(ret != 3)
+        {
+            fprintf(stderr, "Error: poorly formatted mount table in log file.\n");
+            return(-1);
+        }
+        pos--;
+        *pos = '\0';
+        array_index++;
+    }
+
     return(0);
 }
 
@@ -340,35 +455,6 @@ int darshan_log_getfile(darshan_fd fd, struct darshan_posix_file *file)
     return(-1);
 }
 
-#if 0
-int darshan_log_getexe(darshan_fd fd, char *buf)
-{
-    int ret;
-    char* newline;
-
-    ret = darshan_log_seek(fd, fd->job_struct_size);
-    if(ret < 0)
-        return(ret);
-
-    ret = darshan_log_read(fd, buf, (fd->COMPAT_CP_EXE_LEN + 1));
-    if (ret < (fd->COMPAT_CP_EXE_LEN + 1))
-    {
-        perror("darshan_log_read");
-        return(-1);
-    }
-
-    /* this call is only supposed to return the exe string, but starting in
-     * log format 1.23 there could be a table of mount entry information
-     * after the exe.  Look for newline character and truncate there.
-     */
-    newline = strchr(buf, '\n');
-    if(newline)
-        *newline = '\0';
-
-    return (0);
-}
-#endif
-
 /* darshan_log_close()
  *
  * close an open darshan file descriptor
@@ -380,7 +466,9 @@ void darshan_log_close(darshan_fd fd)
     if(fd->pf)
         close(fd->pf);
 
-    free(fd->name);
+    if(fd->exe_mnt_data)
+        free(fd->exe_mnt_data);
+
     free(fd);
 }
 
diff --git a/darshan-util/darshan-logutils.h b/darshan-util/darshan-logutils.h
index ec5842b..05c3e9b 100644
--- a/darshan-util/darshan-logutils.h
+++ b/darshan-util/darshan-logutils.h
@@ -22,17 +22,10 @@ darshan_fd darshan_log_open(const char *name, const char* mode);
 int darshan_log_getheader(darshan_fd file, struct darshan_header *header);
 int darshan_log_getjob(darshan_fd file, struct darshan_job *job);
 int darshan_log_gethash(darshan_fd file, struct darshan_record_ref **hash);
-#if 0
-int darshan_log_getfile(darshan_fd fd, 
-    struct darshan_job* job, 
-    struct darshan_file *file);
+int darshan_log_getfile(darshan_fd fd, struct darshan_posix_file *file);
 int darshan_log_getexe(darshan_fd fd, char *buf);
-int darshan_log_getmounts(darshan_fd fd,
-    int64_t** devs,
-    char*** mnt_pts,
-    char*** fs_types,
-    int* count);
-#endif
+int darshan_log_getmounts(darshan_fd fd, int64_t** devs, char*** mnt_pts,
+    char*** fs_types, int* count);
 void darshan_log_close(darshan_fd file);
 
 /* convenience macros for printing out counters */
diff --git a/darshan-util/darshan-posix-parser.c b/darshan-util/darshan-posix-parser.c
index 9f07d4f..cc3417e 100644
--- a/darshan-util/darshan-posix-parser.c
+++ b/darshan-util/darshan-posix-parser.c
@@ -21,6 +21,7 @@
 int main(int argc, char **argv)
 {
     int ret;
+    int i;
     char *filename;
     char tmp_string[4096];
     darshan_fd file;
@@ -28,8 +29,15 @@ int main(int argc, char **argv)
     struct darshan_job job;
     struct darshan_record_ref *rec_hash = NULL;
     struct darshan_record_ref *ref, *tmp;
+    int mount_count;
+    int64_t* devs;
+    char** mnt_pts;
+    char** fs_types;
     struct darshan_posix_file next_rec;
     time_t tmp_time = 0;
+    char *token;
+    char *save;
+    char buffer[DARSHAN_JOB_METADATA_LEN];
 
     assert(argc == 2);
     filename = argv[1];
@@ -62,6 +70,15 @@ int main(int argc, char **argv)
         return(-1);
     }
 
+    /* get the original command line for this job */
+    ret = darshan_log_getexe(file, tmp_string);
+    if(ret < 0)
+    {
+        fprintf(stderr, "Error: unable to read trailing job information.\n");
+        darshan_log_close(file);
+        return(-1);
+    }
+
     /* print job summary */
     printf("# darshan log version: %s\n", header.version_string);
     printf("# size of POSIX file statistics: %zu bytes\n", sizeof(next_rec));
@@ -78,6 +95,43 @@ int main(int argc, char **argv)
     printf("# end_time_asci: %s", ctime(&tmp_time));
     printf("# nprocs: %" PRId64 "\n", job.nprocs);
     printf("# run time: %" PRId64 "\n", job.end_time - job.start_time + 1);
+    for(token=strtok_r(job.metadata, "\n", &save);
+        token != NULL;
+        token=strtok_r(NULL, "\n", &save))
+    {
+        char *key;
+        char *value;
+        /* NOTE: we intentionally only split on the first = character.
+         * There may be additional = characters in the value portion
+         * (for example, when storing mpi-io hints).
+         */
+        strcpy(buffer, token);
+        key = buffer;
+        value = index(buffer, '=');
+        if(!value)
+            continue;
+        /* convert = to a null terminator to split key and value */
+        value[0] = '\0';
+        value++;
+        printf("# metadata: %s = %s\n", key, value);
+    }
+
+    /* get the mount information for this log */
+    ret = darshan_log_getmounts(file, &devs, &mnt_pts, &fs_types, &mount_count);
+    if(ret < 0)
+    {
+        fprintf(stderr, "darshan_log_getmounts() failed to read mount information.\n");
+        darshan_log_close(file);
+        return(-1);
+    }
+
+    /* print table of mounted file systems */
+    printf("\n# mounted file systems (device, mount point, and fs type)\n");
+    printf("# -------------------------------------------------------\n");
+    for(i=0; i<mount_count; i++)
+    {
+        printf("# mount entry: %" PRId64 "\t%s\t%s\n", devs[i], mnt_pts[i], fs_types[i]);
+    }
 
     /* read hash of darshan records */
     ret = darshan_log_gethash(file, &rec_hash);
@@ -106,7 +160,7 @@ int main(int argc, char **argv)
 
     /* iterate the posix file records stored in the darshan log */
     printf("\n*** FILE RECORD DATA ***\n");
-    int i = 0;
+    i = 0;
     do{
         struct darshan_record_ref *ref;
 


hooks/post-receive
--



More information about the Darshan-commits mailing list