[Darshan-commits] [Darshan] branch, dev-modular, updated. 756b640d97a13630c0e30e6b84b82e975e78d369
Service Account
git at mcs.anl.gov
Mon Feb 16 23:41:07 CST 2015
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "".
The branch, dev-modular has been updated
via 756b640d97a13630c0e30e6b84b82e975e78d369 (commit)
from 018122117ee9f50abaa2b5fd73b7d3133c09373c (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit 756b640d97a13630c0e30e6b84b82e975e78d369
Author: Shane Snyder <ssnyder at mcs.anl.gov>
Date: Mon Feb 16 23:40:20 2015 -0600
updated runtime/util to use exe/mount info
-----------------------------------------------------------------------
Summary of changes:
darshan-log-format.h | 3 +-
darshan-runtime/darshan-core.h | 4 +-
darshan-runtime/doc/darshan-runtime.txt | 1 -
darshan-runtime/lib/darshan-core.c | 352 ++++++++++++++++++++++++------
darshan-util/darshan-logutils.c | 180 ++++++++++++----
darshan-util/darshan-logutils.h | 13 +-
darshan-util/darshan-posix-parser.c | 56 +++++-
7 files changed, 476 insertions(+), 133 deletions(-)
Diff of changes:
diff --git a/darshan-log-format.h b/darshan-log-format.h
index 6f7d63d..ccd4df9 100644
--- a/darshan-log-format.h
+++ b/darshan-log-format.h
@@ -36,7 +36,6 @@ typedef uint64_t darshan_record_id;
/* NOTES: - valid ids range from [0...DARSHAN_MAX_MODS-1]
* - order of ids control module shutdown order (and consequently, order in log file)
*/
-/* TODO: enforce maximum? */
#define DARSHAN_MAX_MODS 16
typedef enum
{
@@ -84,7 +83,7 @@ struct darshan_job
int64_t end_time;
int64_t nprocs;
int64_t jobid;
- char metadata[DARSHAN_JOB_METADATA_LEN]; /* TODO: what is this? */
+ char metadata[DARSHAN_JOB_METADATA_LEN];
};
struct darshan_record
diff --git a/darshan-runtime/darshan-core.h b/darshan-runtime/darshan-core.h
index 91f68b5..0caa468 100644
--- a/darshan-runtime/darshan-core.h
+++ b/darshan-runtime/darshan-core.h
@@ -22,14 +22,14 @@ struct darshan_core_module
};
/* in memory structure to keep up with job level data */
-/* TODO: trailing data ? */
struct darshan_core_runtime
{
struct darshan_job log_job;
char exe[CP_EXE_LEN+1];
- double wtime_offset;
struct darshan_core_record_ref *rec_hash;
struct darshan_core_module* mod_array[DARSHAN_MAX_MODS];
+ double wtime_offset;
+ char *trailing_data;
};
struct darshan_core_record_ref
diff --git a/darshan-runtime/doc/darshan-runtime.txt b/darshan-runtime/doc/darshan-runtime.txt
index ade9171..c4699dd 100644
--- a/darshan-runtime/doc/darshan-runtime.txt
+++ b/darshan-runtime/doc/darshan-runtime.txt
@@ -407,7 +407,6 @@ behavior at runtime:
* DARSHAN_DISABLE: disables Darshan instrumentation
* DARSHAN_INTERNAL_TIMING: enables internal instrumentation that will print the time required to startup and shutdown Darshan to stderr at run time.
* DARSHAN_LOGHINTS: specifies the MPI-IO hints to use when storing the Darshan output file. The format is a semicolon-delimited list of key=value pairs, for example: hint1=value1;hint2=value2
-* DARSHAN_DISABLE_TIMING: disables the subset of Darshan instrumentation that gathers timing information
* DARSHAN_MEMALIGN: specifies a value for memory alignment (CP_MEM_ALIGNMENT)
* DARSHAN_JOBID: specifies the name of the environment variable to use for the job identifier, such as PBS_JOBID
* DARSHAN_DISABLE_SHARED_REDUCTION: disables the step in Darshan aggregation
diff --git a/darshan-runtime/lib/darshan-core.c b/darshan-runtime/lib/darshan-core.c
index ef03e4b..ee76829 100644
--- a/darshan-runtime/lib/darshan-core.c
+++ b/darshan-runtime/lib/darshan-core.c
@@ -29,24 +29,43 @@
extern char* __progname;
/* internal variable delcarations */
-static struct darshan_core_runtime *darshan_core_job = NULL;
+static struct darshan_core_runtime *darshan_core = NULL;
static pthread_mutex_t darshan_core_mutex = PTHREAD_MUTEX_INITIALIZER;
static int my_rank = -1;
static int nprocs = -1;
+/* FS mount information */
+#define DARSHAN_MAX_MNTS 64
+#define DARSHAN_MAX_MNT_PATH 256
+#define DARSHAN_MAX_MNT_TYPE 32
+struct mnt_data
+{
+ int64_t hash;
+ int64_t block_size;
+ char path[DARSHAN_MAX_MNT_PATH];
+ char type[DARSHAN_MAX_MNT_TYPE];
+};
+static struct mnt_data mnt_data_array[DARSHAN_MAX_MNTS];
+static int mnt_data_count = 0;
+
/* prototypes for internal helper functions */
static void darshan_core_initialize(
int *argc, char ***argv);
static void darshan_core_shutdown(
void);
static void darshan_core_cleanup(
- struct darshan_core_runtime* job);
+ struct darshan_core_runtime* core);
static void darshan_get_logfile_name(
char* logfile_name, int jobid, struct tm* start_tm);
static void darshan_log_record_hints_and_ver(
- struct darshan_core_runtime* job);
+ struct darshan_core_runtime* core);
+static void darshan_get_exe_and_mounts_root(
+ struct darshan_core_runtime *core, char* trailing_data,
+ int space_left);
+static char* darshan_get_exe_and_mounts(
+ struct darshan_core_runtime *core);
static void darshan_get_shared_record_ids(
- struct darshan_core_runtime *job, darshan_record_id *shared_recs);
+ struct darshan_core_runtime *core, darshan_record_id *shared_recs);
static int darshan_log_coll_open(
char *logfile_name, MPI_File *log_fh);
static int darshan_log_write_record_hash(
@@ -120,28 +139,30 @@ static void darshan_core_initialize(int *argc, char ***argv)
init_start = DARSHAN_MPI_CALL(PMPI_Wtime)();
/* setup darshan runtime if darshan is enabled and hasn't been initialized already */
- if(!getenv("DARSHAN_DISABLE") && !darshan_core_job)
+ if(!getenv("DARSHAN_DISABLE") && !darshan_core)
{
- /* allocate structure to track darshan_core_job information */
- darshan_core_job = malloc(sizeof(*darshan_core_job));
- if(darshan_core_job)
+ /* TODO: darshan mem alignment code? */
+
+ /* allocate structure to track darshan_core_runtime information */
+ darshan_core = malloc(sizeof(*darshan_core));
+ if(darshan_core)
{
- memset(darshan_core_job, 0, sizeof(*darshan_core_job));
+ memset(darshan_core, 0, sizeof(*darshan_core));
- darshan_core_job->log_job.uid = getuid();
- darshan_core_job->log_job.start_time = time(NULL);
- darshan_core_job->log_job.nprocs = nprocs;
- darshan_core_job->wtime_offset = DARSHAN_MPI_CALL(PMPI_Wtime)();
+ darshan_core->log_job.uid = getuid();
+ darshan_core->log_job.start_time = time(NULL);
+ darshan_core->log_job.nprocs = nprocs;
+ darshan_core->wtime_offset = DARSHAN_MPI_CALL(PMPI_Wtime)();
/* record exe and arguments */
for(i=0; i<(*argc); i++)
{
- chars_left = CP_EXE_LEN-strlen(darshan_core_job->exe);
- strncat(darshan_core_job->exe, *(argv[i]), chars_left);
+ chars_left = CP_EXE_LEN-strlen(darshan_core->exe);
+ strncat(darshan_core->exe, (*argv)[i], chars_left);
if(i < ((*argc)-1))
{
- chars_left = CP_EXE_LEN-strlen(darshan_core_job->exe);
- strncat(darshan_core_job->exe, " ", chars_left);
+ chars_left = CP_EXE_LEN-strlen(darshan_core->exe);
+ strncat(darshan_core->exe, " ", chars_left);
}
}
@@ -150,19 +171,22 @@ static void darshan_core_initialize(int *argc, char ***argv)
*/
if(argc == 0)
{
- chars_left = CP_EXE_LEN-strlen(darshan_core_job->exe);
- strncat(darshan_core_job->exe, __progname, chars_left);
- chars_left = CP_EXE_LEN-strlen(darshan_core_job->exe);
- strncat(darshan_core_job->exe, " <unknown args>", chars_left);
+ chars_left = CP_EXE_LEN-strlen(darshan_core->exe);
+ strncat(darshan_core->exe, __progname, chars_left);
+ chars_left = CP_EXE_LEN-strlen(darshan_core->exe);
+ strncat(darshan_core->exe, " <unknown args>", chars_left);
}
if(chars_left == 0)
{
/* we ran out of room; mark that string was truncated */
truncate_offset = CP_EXE_LEN - strlen(truncate_string);
- sprintf(&darshan_core_job->exe[truncate_offset], "%s",
+ sprintf(&darshan_core->exe[truncate_offset], "%s",
truncate_string);
}
+
+ /* collect information about command line and mounted file systems */
+ darshan_core->trailing_data = darshan_get_exe_and_mounts(darshan_core);
}
}
@@ -185,7 +209,7 @@ static void darshan_core_shutdown()
{
int i;
char *logfile_name;
- struct darshan_core_runtime *final_job;
+ struct darshan_core_runtime *final_core;
int internal_timing_flag = 0;
char *envjobid;
char *jobid_str;
@@ -210,7 +234,7 @@ static void darshan_core_shutdown()
internal_timing_flag = 1;
DARSHAN_CORE_LOCK();
- if(!darshan_core_job)
+ if(!darshan_core)
{
DARSHAN_CORE_UNLOCK();
return;
@@ -218,8 +242,8 @@ static void darshan_core_shutdown()
/* disable further tracing while hanging onto the data so that we can
* write it out
*/
- final_job = darshan_core_job;
- darshan_core_job = NULL;
+ final_core = darshan_core;
+ darshan_core = NULL;
DARSHAN_CORE_UNLOCK();
start_log_time = DARSHAN_MPI_CALL(PMPI_Wtime)();
@@ -227,7 +251,7 @@ static void darshan_core_shutdown()
logfile_name = malloc(PATH_MAX);
if(!logfile_name)
{
- darshan_core_cleanup(final_job);
+ darshan_core_cleanup(final_core);
return;
}
@@ -254,18 +278,15 @@ static void darshan_core_shutdown()
jobid = getpid();
}
- final_job->log_job.jobid = (int64_t)jobid;
+ final_core->log_job.jobid = (int64_t)jobid;
-/* TODO */
-#if 0
/* if we are using any hints to write the log file, then record those
* hints with the darshan job information
*/
- darshan_log_record_hints_and_ver(final_job);
-#endif
+ darshan_log_record_hints_and_ver(final_core);
/* use human readable start time format in log filename */
- start_time_tmp = final_job->log_job.start_time;
+ start_time_tmp = final_core->log_job.start_time;
start_tm = localtime(&start_time_tmp);
/* construct log file name */
@@ -280,27 +301,27 @@ static void darshan_core_shutdown()
{
/* failed to generate log file name */
free(logfile_name);
- darshan_core_cleanup(final_job);
+ darshan_core_cleanup(final_core);
return;
}
- final_job->log_job.end_time = time(NULL);
+ final_core->log_job.end_time = time(NULL);
/* reduce to report first start time and last end time across all ranks
* at rank 0
*/
- DARSHAN_MPI_CALL(PMPI_Reduce)(&final_job->log_job.start_time, &first_start_time, 1, MPI_LONG_LONG, MPI_MIN, 0, MPI_COMM_WORLD);
- DARSHAN_MPI_CALL(PMPI_Reduce)(&final_job->log_job.end_time, &last_end_time, 1, MPI_LONG_LONG, MPI_MAX, 0, MPI_COMM_WORLD);
+ DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job.start_time, &first_start_time, 1, MPI_LONG_LONG, MPI_MIN, 0, MPI_COMM_WORLD);
+ DARSHAN_MPI_CALL(PMPI_Reduce)(&final_core->log_job.end_time, &last_end_time, 1, MPI_LONG_LONG, MPI_MAX, 0, MPI_COMM_WORLD);
if(my_rank == 0)
{
- final_job->log_job.start_time = first_start_time;
- final_job->log_job.end_time = last_end_time;
+ final_core->log_job.start_time = first_start_time;
+ final_core->log_job.end_time = last_end_time;
}
/* set which local modules were actually used */
for(i = 0; i < DARSHAN_MAX_MODS; i++)
{
- if(final_job->mod_array[i])
+ if(final_core->mod_array[i])
local_mod_use[i] = 1;
}
@@ -308,7 +329,7 @@ static void darshan_core_shutdown()
DARSHAN_MPI_CALL(PMPI_Allreduce)(local_mod_use, global_mod_use_count, DARSHAN_MAX_MODS, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
/* get a list of records which are shared across all processes */
- darshan_get_shared_record_ids(final_job, shared_recs);
+ darshan_get_shared_record_ids(final_core, shared_recs);
/* collectively open the darshan log file */
ret = darshan_log_coll_open(logfile_name, &log_fh);
@@ -325,16 +346,25 @@ static void darshan_core_shutdown()
unlink(logfile_name);
}
free(logfile_name);
- darshan_core_cleanup(final_job);
+ darshan_core_cleanup(final_core);
return;
}
/* rank 0 is responsible for writing the darshan job information */
if(my_rank == 0)
{
+ unsigned char tmp_buf[CP_JOB_RECORD_SIZE];
+ unsigned char *tmp_ptr;
+
+ /* pack the job info and exe/mount info into a buffer for writing */
+ tmp_ptr = tmp_buf;
+ memcpy(tmp_ptr, &final_core->log_job, sizeof(struct darshan_job));
+ tmp_ptr += sizeof(struct darshan_job);
+ memcpy(tmp_ptr, final_core->trailing_data, CP_EXE_LEN+1);
+
/* write the job information, making sure to prealloc space for the log header */
all_ret = DARSHAN_MPI_CALL(PMPI_File_write_at)(log_fh, sizeof(struct darshan_header),
- &final_job->log_job, sizeof(struct darshan_job), MPI_BYTE, &status);
+ tmp_buf, CP_JOB_RECORD_SIZE, MPI_BYTE, &status);
if(all_ret != MPI_SUCCESS)
{
fprintf(stderr, "darshan library warning: unable to write job data to log file %s\n",
@@ -343,7 +373,7 @@ static void darshan_core_shutdown()
}
/* TODO: after compression is added, this should be fixed */
- log_header.rec_map.off = sizeof(struct darshan_header) + sizeof(struct darshan_job);
+ log_header.rec_map.off = sizeof(struct darshan_header) + CP_JOB_RECORD_SIZE;
}
/* error out if unable to write job information */
@@ -351,12 +381,12 @@ static void darshan_core_shutdown()
if(all_ret != 0)
{
free(logfile_name);
- darshan_core_cleanup(final_job);
+ darshan_core_cleanup(final_core);
return;
}
/* write the record name->id hash to the log file */
- ret = darshan_log_write_record_hash(log_fh, final_job->rec_hash,
+ ret = darshan_log_write_record_hash(log_fh, final_core->rec_hash,
shared_recs, &log_header.rec_map);
/* error out if unable to write record hash */
@@ -371,7 +401,7 @@ static void darshan_core_shutdown()
unlink(logfile_name);
}
free(logfile_name);
- darshan_core_cleanup(final_job);
+ darshan_core_cleanup(final_core);
return;
}
@@ -384,7 +414,7 @@ static void darshan_core_shutdown()
*/
for(i = 0; i < DARSHAN_MAX_MODS; i++)
{
- struct darshan_core_module* this_mod = final_job->mod_array[i];
+ struct darshan_core_module* this_mod = final_core->mod_array[i];
MPI_Comm mod_comm;
void* mod_buf = NULL;
int mod_buf_size = 0;
@@ -436,7 +466,7 @@ static void darshan_core_shutdown()
unlink(logfile_name);
}
free(logfile_name);
- darshan_core_cleanup(final_job);
+ darshan_core_cleanup(final_core);
return;
}
@@ -475,7 +505,7 @@ static void darshan_core_shutdown()
if(all_ret != 0)
{
free(logfile_name);
- darshan_core_cleanup(final_job);
+ darshan_core_cleanup(final_core);
return;
}
@@ -512,7 +542,7 @@ static void darshan_core_shutdown()
}
free(logfile_name);
- darshan_core_cleanup(final_job);
+ darshan_core_cleanup(final_core);
if(internal_timing_flag)
{
@@ -523,20 +553,20 @@ static void darshan_core_shutdown()
}
/* free darshan core data structures to shutdown */
-static void darshan_core_cleanup(struct darshan_core_runtime* job)
+static void darshan_core_cleanup(struct darshan_core_runtime* core)
{
int i;
for(i = 0; i < DARSHAN_MAX_MODS; i++)
{
- if(job->mod_array[i])
+ if(core->mod_array[i])
{
- free(job->mod_array[i]);
- job->mod_array[i] = NULL;
+ free(core->mod_array[i]);
+ core->mod_array[i] = NULL;
}
}
- free(job);
+ free(core);
return;
}
@@ -677,7 +707,7 @@ static void darshan_get_logfile_name(char* logfile_name, int jobid, struct tm* s
}
/* record any hints used to write the darshan log in the log header */
-static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* job)
+static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* core)
{
char* hints;
char* header_hints;
@@ -701,15 +731,15 @@ static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* job)
return;
meta_remain = DARSHAN_JOB_METADATA_LEN -
- strlen(job->log_job.metadata) - 1;
+ strlen(core->log_job.metadata) - 1;
if(meta_remain >= (strlen(PACKAGE_VERSION) + 9))
{
- sprintf(job->log_job.metadata, "lib_ver=%s\n", PACKAGE_VERSION);
+ sprintf(core->log_job.metadata, "lib_ver=%s\n", PACKAGE_VERSION);
meta_remain -= (strlen(PACKAGE_VERSION) + 9);
}
if(meta_remain >= (3 + strlen(header_hints)))
{
- m = job->log_job.metadata + strlen(job->log_job.metadata);
+ m = core->log_job.metadata + strlen(core->log_job.metadata);
/* We have room to store the hints in the metadata portion of
* the job header. We just prepend an h= to the hints list. The
* metadata parser will ignore = characters that appear in the value
@@ -722,7 +752,187 @@ static void darshan_log_record_hints_and_ver(struct darshan_core_runtime* job)
return;
}
-static void darshan_get_shared_record_ids(struct darshan_core_runtime *job,
+static int mnt_data_cmp(const void* a, const void* b)
+{
+ const struct mnt_data *d_a = (const struct mnt_data*)a;
+ const struct mnt_data *d_b = (const struct mnt_data*)b;
+
+ if(strlen(d_a->path) > strlen(d_b->path))
+ return(-1);
+ else if(strlen(d_a->path) < strlen(d_b->path))
+ return(1);
+ else
+ return(0);
+}
+
+/* adds an entry to table of mounted file systems */
+static void add_entry(char* trailing_data, int* space_left, struct mntent *entry)
+{
+ int ret;
+ char tmp_mnt[256];
+ struct statfs statfsbuf;
+
+ strncpy(mnt_data_array[mnt_data_count].path, entry->mnt_dir,
+ DARSHAN_MAX_MNT_PATH-1);
+ strncpy(mnt_data_array[mnt_data_count].type, entry->mnt_type,
+ DARSHAN_MAX_MNT_TYPE-1);
+ mnt_data_array[mnt_data_count].hash =
+ darshan_hash((void*)mnt_data_array[mnt_data_count].path,
+ strlen(mnt_data_array[mnt_data_count].path), 0);
+ /* NOTE: we now try to detect the preferred block size for each file
+ * system using fstatfs(). On Lustre we assume a size of 1 MiB
+ * because fstatfs() reports 4 KiB.
+ */
+#ifndef LL_SUPER_MAGIC
+#define LL_SUPER_MAGIC 0x0BD00BD0
+#endif
+ ret = statfs(entry->mnt_dir, &statfsbuf);
+ if(ret == 0 && statfsbuf.f_type != LL_SUPER_MAGIC)
+ mnt_data_array[mnt_data_count].block_size = statfsbuf.f_bsize;
+ else if(ret == 0 && statfsbuf.f_type == LL_SUPER_MAGIC)
+ mnt_data_array[mnt_data_count].block_size = 1024*1024;
+ else
+ mnt_data_array[mnt_data_count].block_size = 4096;
+
+ /* store mount information for use in header of darshan log */
+ ret = snprintf(tmp_mnt, 256, "\n%" PRId64 "\t%s\t%s",
+ mnt_data_array[mnt_data_count].hash,
+ entry->mnt_type, entry->mnt_dir);
+ if(ret < 256 && strlen(tmp_mnt) <= (*space_left))
+ {
+ strcat(trailing_data, tmp_mnt);
+ (*space_left) -= strlen(tmp_mnt);
+ }
+
+ mnt_data_count++;
+ return;
+}
+
+/* darshan_get_exe_and_mounts_root()
+ *
+ * collects command line and list of mounted file systems into a string that
+ * will be stored with the job header
+ */
+static void darshan_get_exe_and_mounts_root(struct darshan_core_runtime *core,
+ char* trailing_data, int space_left)
+{
+ FILE* tab;
+ struct mntent *entry;
+ char* exclude;
+ int tmp_index = 0;
+ int skip = 0;
+
+ /* skip these fs types */
+ static char* fs_exclusions[] = {
+ "tmpfs",
+ "proc",
+ "sysfs",
+ "devpts",
+ "binfmt_misc",
+ "fusectl",
+ "debugfs",
+ "securityfs",
+ "nfsd",
+ "none",
+ "rpc_pipefs",
+ "hugetlbfs",
+ "cgroup",
+ NULL
+ };
+
+ /* length of exe has already been safety checked in darshan-posix.c */
+ strcat(trailing_data, core->exe);
+ space_left = CP_EXE_LEN - strlen(trailing_data);
+
+ /* we make two passes through mounted file systems; in the first pass we
+ * grab any non-nfs mount points, then on the second pass we grab nfs
+ * mount points
+ */
+
+ tab = setmntent("/etc/mtab", "r");
+ if(!tab)
+ return;
+ /* loop through list of mounted file systems */
+ while(mnt_data_count<DARSHAN_MAX_MNTS && (entry = getmntent(tab)) != NULL)
+ {
+ /* filter out excluded fs types */
+ tmp_index = 0;
+ skip = 0;
+ while((exclude = fs_exclusions[tmp_index]))
+ {
+ if(!(strcmp(exclude, entry->mnt_type)))
+ {
+ skip =1;
+ break;
+ }
+ tmp_index++;
+ }
+
+ if(skip || (strcmp(entry->mnt_type, "nfs") == 0))
+ continue;
+
+ add_entry(trailing_data, &space_left, entry);
+ }
+ endmntent(tab);
+
+ tab = setmntent("/etc/mtab", "r");
+ if(!tab)
+ return;
+ /* loop through list of mounted file systems */
+ while(mnt_data_count<DARSHAN_MAX_MNTS && (entry = getmntent(tab)) != NULL)
+ {
+ if(strcmp(entry->mnt_type, "nfs") != 0)
+ continue;
+
+ add_entry(trailing_data, &space_left, entry);
+ }
+ endmntent(tab);
+
+ /* Sort mount points in order of longest path to shortest path. This is
+ * necessary so that if we try to match file paths to mount points later
+ * we don't match on "/" every time.
+ */
+ qsort(mnt_data_array, mnt_data_count, sizeof(mnt_data_array[0]), mnt_data_cmp);
+ return;
+}
+
+/* darshan_get_exe_and_mounts()
+ *
+ * collects command line and list of mounted file systems into a string that
+ * will be stored with the job header
+ */
+static char* darshan_get_exe_and_mounts(struct darshan_core_runtime *core)
+{
+ char* trailing_data;
+ int space_left;
+
+ space_left = CP_EXE_LEN + 1;
+ trailing_data = malloc(space_left);
+ if(!trailing_data)
+ {
+ return(NULL);
+ }
+ memset(trailing_data, 0, space_left);
+
+ if(my_rank == 0)
+ {
+ darshan_get_exe_and_mounts_root(core, trailing_data, space_left);
+ }
+
+ /* broadcast trailing data to all nodes */
+ DARSHAN_MPI_CALL(PMPI_Bcast)(trailing_data, space_left, MPI_CHAR, 0,
+ MPI_COMM_WORLD);
+ /* broadcast mount count to all nodes */
+ DARSHAN_MPI_CALL(PMPI_Bcast)(&mnt_data_count, 1, MPI_INT, 0,
+ MPI_COMM_WORLD);
+ /* broadcast mount data to all nodes */
+ DARSHAN_MPI_CALL(PMPI_Bcast)(mnt_data_array,
+ mnt_data_count*sizeof(mnt_data_array[0]), MPI_BYTE, 0, MPI_COMM_WORLD);
+
+ return(trailing_data);
+}
+
+static void darshan_get_shared_record_ids(struct darshan_core_runtime *core,
darshan_record_id *shared_recs)
{
int i;
@@ -736,7 +946,7 @@ static void darshan_get_shared_record_ids(struct darshan_core_runtime *job,
if(my_rank == 0)
{
ndx = 0;
- HASH_ITER(hlink, job->rec_hash, ref, tmp)
+ HASH_ITER(hlink, core->rec_hash, ref, tmp)
{
id_array[ndx++] = ref->rec.id;
}
@@ -750,7 +960,7 @@ static void darshan_get_shared_record_ids(struct darshan_core_runtime *job,
/* everyone looks to see if they opened the same records as root */
for(i=0; (i<DARSHAN_CORE_MAX_RECORDS && id_array[i] != 0); i++)
{
- HASH_ITER(hlink, job->rec_hash, ref, tmp)
+ HASH_ITER(hlink, core->rec_hash, ref, tmp)
{
if(id_array[i] == ref->rec.id)
{
@@ -1007,14 +1217,14 @@ void darshan_core_register_module(
DARSHAN_CORE_LOCK();
*runtime_mem_limit = 0;
- if(!darshan_core_job || (id >= DARSHAN_MAX_MODS))
+ if(!darshan_core || (id >= DARSHAN_MAX_MODS))
{
DARSHAN_CORE_UNLOCK();
return;
}
/* see if this module is already registered */
- if(darshan_core_job->mod_array[id])
+ if(darshan_core->mod_array[id])
{
/* if module is already registered just return */
/* NOTE: we do not recalculate memory limit here, just set to 0 */
@@ -1035,7 +1245,7 @@ void darshan_core_register_module(
mod->mod_funcs = *funcs;
/* register module with darshan */
- darshan_core_job->mod_array[id] = mod;
+ darshan_core->mod_array[id] = mod;
/* TODO: something smarter than just 2 MiB per module */
*runtime_mem_limit = 2 * 1024 * 1024;
@@ -1054,7 +1264,7 @@ void darshan_core_lookup_record_id(
darshan_record_id tmp_id;
struct darshan_core_record_ref* ref;
- if(!darshan_core_job || !name)
+ if(!darshan_core || !name)
return;
/* TODO: what do you do with printable flag? */
@@ -1065,7 +1275,7 @@ void darshan_core_lookup_record_id(
DARSHAN_CORE_LOCK();
/* check to see if we've already stored the id->name mapping for this record */
- HASH_FIND(hlink, darshan_core_job->rec_hash, &tmp_id, sizeof(darshan_record_id), ref);
+ HASH_FIND(hlink, darshan_core->rec_hash, &tmp_id, sizeof(darshan_record_id), ref);
if(!ref)
{
/* if not, add this record to the hash */
@@ -1077,7 +1287,7 @@ void darshan_core_lookup_record_id(
if(ref->rec.name)
strcpy(ref->rec.name, name);
- HASH_ADD(hlink, darshan_core_job->rec_hash, rec.id, sizeof(darshan_record_id), ref);
+ HASH_ADD(hlink, darshan_core->rec_hash, rec.id, sizeof(darshan_record_id), ref);
}
}
@@ -1091,12 +1301,12 @@ void darshan_core_lookup_record_id(
double darshan_core_wtime()
{
- if(!darshan_core_job)
+ if(!darshan_core)
{
return(0);
}
- return(DARSHAN_MPI_CALL(PMPI_Wtime)() - darshan_core_job->wtime_offset);
+ return(DARSHAN_MPI_CALL(PMPI_Wtime)() - darshan_core->wtime_offset);
}
/*
diff --git a/darshan-util/darshan-logutils.c b/darshan-util/darshan-logutils.c
index 7633049..ab1b82f 100644
--- a/darshan-util/darshan-logutils.c
+++ b/darshan-util/darshan-logutils.c
@@ -27,10 +27,9 @@ struct darshan_fd_s
{
int pf;
int64_t pos;
- char mode[2];
- int swap_flag;
char version[8];
- char* name;
+ int swap_flag;
+ char *exe_mnt_data;
struct darshan_log_map job_map;
struct darshan_log_map rec_map;
struct darshan_log_map mod_map[DARSHAN_MAX_MODS];
@@ -64,21 +63,9 @@ darshan_fd darshan_log_open(const char *name, const char *mode)
return(NULL);
memset(tmp_fd, 0, sizeof(*tmp_fd));
- /* TODO: why is mode needed??? */
- /* TODO: why is name needed??? */
- tmp_fd->mode[0] = mode[0];
- tmp_fd->mode[1] = mode[1];
- tmp_fd->name = strdup(name);
- if(!tmp_fd->name)
- {
- free(tmp_fd);
- return(NULL);
- }
-
tmp_fd->pf = open(name, o_flags);
if(tmp_fd->pf < 0)
{
- free(tmp_fd->name);
free(tmp_fd);
return(NULL);
}
@@ -163,6 +150,7 @@ int darshan_log_getheader(darshan_fd fd, struct darshan_header *header)
*/
int darshan_log_getjob(darshan_fd fd, struct darshan_job *job)
{
+ char job_buf[CP_JOB_RECORD_SIZE] = {0};
int ret;
ret = darshan_log_seek(fd, fd->job_map.off);
@@ -173,13 +161,15 @@ int darshan_log_getjob(darshan_fd fd, struct darshan_job *job)
}
/* read the job data from the log file */
- ret = darshan_log_read(fd, job, fd->job_map.len);
+ ret = darshan_log_read(fd, job_buf, fd->job_map.len);
if(ret < fd->job_map.len)
{
fprintf(stderr, "Error: invalid darshan log file (failed to read job data).\n");
return(-1);
}
+ memcpy(job, job_buf, sizeof(*job));
+
if(fd->swap_flag)
{
/* swap bytes if necessary */
@@ -190,6 +180,131 @@ int darshan_log_getjob(darshan_fd fd, struct darshan_job *job)
DARSHAN_BSWAP64(&job->jobid);
}
+ /* save trailing job data, so exe and mount information can be retrieved later */
+ fd->exe_mnt_data = malloc(CP_EXE_LEN+1);
+ if(!fd->exe_mnt_data)
+ return(-1);
+ memcpy(fd->exe_mnt_data, &job_buf[sizeof(*job)], CP_EXE_LEN+1);
+
+ return(0);
+}
+
+#if 0
+#ifdef HAVE_STRNDUP
+ metadata = strndup(job->metadata, sizeof(job->metadata));
+#else
+ metadata = strdup(job->metadata);
+#endif
+ char *kv;
+ char *key;
+ char *value;
+ char *save;
+
+ for(kv=strtok_r(metadata, "\n", &save);
+ kv != NULL;
+ kv=strtok_r(NULL, "\n", &save))
+ {
+ /* NOTE: we intentionally only split on the first = character.
+ * There may be additional = characters in the value portion
+ * (for example, when storing mpi-io hints).
+ */
+ strcpy(buffer, kv);
+ key = buffer;
+ value = index(buffer, '=');
+ if(!value)
+ continue;
+ /* convert = to a null terminator to split key and value */
+ value[0] = '\0';
+ value++;
+ if (strcmp(key, "prev_ver") == 0)
+ {
+ strncpy(job->version_string, value, sizeof(job->version_string));
+ }
+ }
+ free(metadata);
+#endif
+
+int darshan_log_getexe(darshan_fd fd, char *buf)
+{
+ char *newline;
+
+ /* TODO: try reading log job one more time to set this buffer up */
+ if(!fd->exe_mnt_data)
+ return(-1);
+
+ newline = strchr(fd->exe_mnt_data, '\n');
+
+ /* copy over the exe string */
+ if(newline)
+ memcpy(buf, fd->exe_mnt_data, (newline - fd->exe_mnt_data));
+
+ return (0);
+}
+
+/* darshan_log_getmounts()
+ *
+ * retrieves mount table information from the log. Note that devs, mnt_pts,
+ * and fs_types are arrays that will be allocated by the function and must
+ * be freed by the caller. count will indicate the size of the arrays
+ */
+int darshan_log_getmounts(darshan_fd fd, int64_t** devs, char*** mnt_pts,
+ char*** fs_types, int* count)
+{
+ int ret;
+ char *pos;
+ int array_index = 0;
+
+ /* TODO: try reading log job one more time to set this buffer up */
+ if(!fd->exe_mnt_data)
+ return(-1);
+
+ /* count entries */
+ *count = 0;
+ pos = fd->exe_mnt_data;
+ while((pos = strchr(pos, '\n')) != NULL)
+ {
+ pos++;
+ (*count)++;
+ }
+
+ if(*count == 0)
+ {
+ /* no mount entries present */
+ return(0);
+ }
+
+ /* allocate output arrays */
+ *devs = malloc((*count)*sizeof(int64_t));
+ assert(*devs);
+ *mnt_pts = malloc((*count)*sizeof(char*));
+ assert(*mnt_pts);
+ *fs_types = malloc((*count)*sizeof(char*));
+ assert(*fs_types);
+
+ /* work backwards through the table and parse each line (except for
+ * first, which holds command line information)
+ */
+ while((pos = strrchr(fd->exe_mnt_data, '\n')) != NULL)
+ {
+ /* overestimate string lengths */
+ (*mnt_pts)[array_index] = malloc(CP_EXE_LEN);
+ assert((*mnt_pts)[array_index]);
+ (*fs_types)[array_index] = malloc(CP_EXE_LEN);
+ assert((*fs_types)[array_index]);
+
+ ret = sscanf(++pos, "%" PRId64 "\t%s\t%s", &(*devs)[array_index],
+ (*fs_types)[array_index], (*mnt_pts)[array_index]);
+
+ if(ret != 3)
+ {
+ fprintf(stderr, "Error: poorly formatted mount table in log file.\n");
+ return(-1);
+ }
+ pos--;
+ *pos = '\0';
+ array_index++;
+ }
+
return(0);
}
@@ -340,35 +455,6 @@ int darshan_log_getfile(darshan_fd fd, struct darshan_posix_file *file)
return(-1);
}
-#if 0
-int darshan_log_getexe(darshan_fd fd, char *buf)
-{
- int ret;
- char* newline;
-
- ret = darshan_log_seek(fd, fd->job_struct_size);
- if(ret < 0)
- return(ret);
-
- ret = darshan_log_read(fd, buf, (fd->COMPAT_CP_EXE_LEN + 1));
- if (ret < (fd->COMPAT_CP_EXE_LEN + 1))
- {
- perror("darshan_log_read");
- return(-1);
- }
-
- /* this call is only supposed to return the exe string, but starting in
- * log format 1.23 there could be a table of mount entry information
- * after the exe. Look for newline character and truncate there.
- */
- newline = strchr(buf, '\n');
- if(newline)
- *newline = '\0';
-
- return (0);
-}
-#endif
-
/* darshan_log_close()
*
* close an open darshan file descriptor
@@ -380,7 +466,9 @@ void darshan_log_close(darshan_fd fd)
if(fd->pf)
close(fd->pf);
- free(fd->name);
+ if(fd->exe_mnt_data)
+ free(fd->exe_mnt_data);
+
free(fd);
}
diff --git a/darshan-util/darshan-logutils.h b/darshan-util/darshan-logutils.h
index ec5842b..05c3e9b 100644
--- a/darshan-util/darshan-logutils.h
+++ b/darshan-util/darshan-logutils.h
@@ -22,17 +22,10 @@ darshan_fd darshan_log_open(const char *name, const char* mode);
int darshan_log_getheader(darshan_fd file, struct darshan_header *header);
int darshan_log_getjob(darshan_fd file, struct darshan_job *job);
int darshan_log_gethash(darshan_fd file, struct darshan_record_ref **hash);
-#if 0
-int darshan_log_getfile(darshan_fd fd,
- struct darshan_job* job,
- struct darshan_file *file);
+int darshan_log_getfile(darshan_fd fd, struct darshan_posix_file *file);
int darshan_log_getexe(darshan_fd fd, char *buf);
-int darshan_log_getmounts(darshan_fd fd,
- int64_t** devs,
- char*** mnt_pts,
- char*** fs_types,
- int* count);
-#endif
+int darshan_log_getmounts(darshan_fd fd, int64_t** devs, char*** mnt_pts,
+ char*** fs_types, int* count);
void darshan_log_close(darshan_fd file);
/* convenience macros for printing out counters */
diff --git a/darshan-util/darshan-posix-parser.c b/darshan-util/darshan-posix-parser.c
index 9f07d4f..cc3417e 100644
--- a/darshan-util/darshan-posix-parser.c
+++ b/darshan-util/darshan-posix-parser.c
@@ -21,6 +21,7 @@
int main(int argc, char **argv)
{
int ret;
+ int i;
char *filename;
char tmp_string[4096];
darshan_fd file;
@@ -28,8 +29,15 @@ int main(int argc, char **argv)
struct darshan_job job;
struct darshan_record_ref *rec_hash = NULL;
struct darshan_record_ref *ref, *tmp;
+ int mount_count;
+ int64_t* devs;
+ char** mnt_pts;
+ char** fs_types;
struct darshan_posix_file next_rec;
time_t tmp_time = 0;
+ char *token;
+ char *save;
+ char buffer[DARSHAN_JOB_METADATA_LEN];
assert(argc == 2);
filename = argv[1];
@@ -62,6 +70,15 @@ int main(int argc, char **argv)
return(-1);
}
+ /* get the original command line for this job */
+ ret = darshan_log_getexe(file, tmp_string);
+ if(ret < 0)
+ {
+ fprintf(stderr, "Error: unable to read trailing job information.\n");
+ darshan_log_close(file);
+ return(-1);
+ }
+
/* print job summary */
printf("# darshan log version: %s\n", header.version_string);
printf("# size of POSIX file statistics: %zu bytes\n", sizeof(next_rec));
@@ -78,6 +95,43 @@ int main(int argc, char **argv)
printf("# end_time_asci: %s", ctime(&tmp_time));
printf("# nprocs: %" PRId64 "\n", job.nprocs);
printf("# run time: %" PRId64 "\n", job.end_time - job.start_time + 1);
+ for(token=strtok_r(job.metadata, "\n", &save);
+ token != NULL;
+ token=strtok_r(NULL, "\n", &save))
+ {
+ char *key;
+ char *value;
+ /* NOTE: we intentionally only split on the first = character.
+ * There may be additional = characters in the value portion
+ * (for example, when storing mpi-io hints).
+ */
+ strcpy(buffer, token);
+ key = buffer;
+ value = index(buffer, '=');
+ if(!value)
+ continue;
+ /* convert = to a null terminator to split key and value */
+ value[0] = '\0';
+ value++;
+ printf("# metadata: %s = %s\n", key, value);
+ }
+
+ /* get the mount information for this log */
+ ret = darshan_log_getmounts(file, &devs, &mnt_pts, &fs_types, &mount_count);
+ if(ret < 0)
+ {
+ fprintf(stderr, "darshan_log_getmounts() failed to read mount information.\n");
+ darshan_log_close(file);
+ return(-1);
+ }
+
+ /* print table of mounted file systems */
+ printf("\n# mounted file systems (device, mount point, and fs type)\n");
+ printf("# -------------------------------------------------------\n");
+ for(i=0; i<mount_count; i++)
+ {
+ printf("# mount entry: %" PRId64 "\t%s\t%s\n", devs[i], mnt_pts[i], fs_types[i]);
+ }
/* read hash of darshan records */
ret = darshan_log_gethash(file, &rec_hash);
@@ -106,7 +160,7 @@ int main(int argc, char **argv)
/* iterate the posix file records stored in the darshan log */
printf("\n*** FILE RECORD DATA ***\n");
- int i = 0;
+ i = 0;
do{
struct darshan_record_ref *ref;
hooks/post-receive
--
More information about the Darshan-commits
mailing list