[Darshan-commits] [Git][darshan/darshan][dev-stdio-utils] 8 commits: prepend file system mount point to hashed file name to retain file system info for obfuscated files
Philip Carns
xgitlab at cels.anl.gov
Mon Sep 12 15:32:12 CDT 2016
Philip Carns pushed to branch dev-stdio-utils at darshan / darshan
Commits:
0d1f6e80 by Glenn K. Lockwood at 2016-08-16T13:26:33-07:00
prepend file system mount point to hashed file name to retain file system info for obfuscated files
- - - - -
6e75dade by Glenn K. Lockwood at 2016-08-24T09:57:37-07:00
Merge branch 'master' into gkl/obfuscate-fs-fix to pull in fix from d4dc84f
- - - - -
eadbecfd by Glenn K. Lockwood at 2016-08-24T10:00:07-07:00
fix wrong struct type (inconsequential as it may be in the given context)
- - - - -
0ed720e8 by Shane Snyder at 2016-08-24T21:30:03-05:00
filter stderr/stdin/stdout for regression tests
- - - - -
df34ba2b by Shane Snyder at 2016-09-08T15:16:31-07:00
add unique id to mmap log file names
This is needed to distinguish between log files generated with
the same job id (i.e., for jobs that have multiple steps)
- - - - -
7f05cf73 by Shane Snyder at 2016-09-09T16:39:48-05:00
Merge branch 'gkl/obfuscate-fs-fix' into 'master'
gkl/obfuscate fs fix: prepend file path to hashed file name to retain fs info
Prepends the file system path to the hashed file name generated by `darshan-convert --obfuscate` so that darshan-parser can still link obfuscated file records to the mounts table.
See merge request !1
- - - - -
c877a1c1 by Shane Snyder at 2016-09-12T13:28:37-05:00
remove POSIX fstream references from job-summary
- - - - -
4addc19b by Phil Carns at 2016-09-12T16:28:02-04:00
Merge branch 'dev-stdio-utils'
- - - - -
5 changed files:
- darshan-runtime/lib/darshan-core.c
- darshan-test/regression/test-cases/fprintf-fscanf-test.sh
- darshan-test/regression/test-cases/stdio-test.sh
- darshan-util/darshan-convert.c
- darshan-util/darshan-logutils.c
Changes:
=====================================
darshan-runtime/lib/darshan-core.c
=====================================
--- a/darshan-runtime/lib/darshan-core.c
+++ b/darshan-runtime/lib/darshan-core.c
@@ -786,6 +786,9 @@ static void *darshan_init_mmap_log(struct darshan_core_runtime* core, int jobid)
int mmap_size;
int sys_page_size;
char cuser[L_cuserid] = {0};
+ uint64_t hlevel;
+ char hname[HOST_NAME_MAX];
+ uint64_t logmod;
char *envstr;
char *mmap_log_path;
void *mmap_p;
@@ -806,12 +809,25 @@ static void *darshan_init_mmap_log(struct darshan_core_runtime* core, int jobid)
darshan_get_user_name(cuser);
+ /* generate a random number to help differentiate the temporary log */
+ /* NOTE: job id is not sufficient for constructing a unique log file name,
+ * since a job could be composed of multiple application runs, so we also
+ * add a random number component to the log name
+ */
+ if(my_rank == 0)
+ {
+ hlevel=DARSHAN_MPI_CALL(PMPI_Wtime)() * 1000000;
+ (void)gethostname(hname, sizeof(hname));
+ logmod = darshan_hash((void*)hname,strlen(hname),hlevel);
+ }
+ DARSHAN_MPI_CALL(PMPI_Bcast)(&logmod, 1, MPI_UINT64_T, 0, MPI_COMM_WORLD);
+
/* construct a unique temporary log file name for this process
* to write mmap log data to
*/
snprintf(core->mmap_log_name, PATH_MAX,
- "/%s/%s_%s_id%d_mmap-log-%d.darshan",
- mmap_log_path, cuser, __progname, jobid, my_rank);
+ "/%s/%s_%s_id%d_mmap-log-%" PRIu64 "-%d.darshan",
+ mmap_log_path, cuser, __progname, jobid, logmod, my_rank);
/* create the temporary mmapped darshan log */
mmap_fd = open(core->mmap_log_name, O_CREAT|O_RDWR|O_EXCL , 0644);
=====================================
darshan-test/regression/test-cases/fprintf-fscanf-test.sh
=====================================
--- a/darshan-test/regression/test-cases/fprintf-fscanf-test.sh
+++ b/darshan-test/regression/test-cases/fprintf-fscanf-test.sh
@@ -29,21 +29,21 @@ fi
# check results
-STDIO_OPENS=`grep STDIO_OPENS $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_OPENS=`grep STDIO_OPENS $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
if [ ! "$STDIO_OPENS" -eq 4 ]; then
echo "Error: STDIO open count of $STDIO_OPENS is incorrect" 1>&2
exit 1
fi
# this will check fprintf counting
-STDIO_BYTES_WRITTEN=`grep STDIO_BYTES_WRITTEN $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_BYTES_WRITTEN=`grep STDIO_BYTES_WRITTEN $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
if [ ! "$STDIO_BYTES_WRITTEN" -eq 15 ]; then
echo "Error: STDIO bytes written count of $STDIO_BYTES_WRITTEN is incorrect" 1>&2
exit 1
fi
# this will check fscanf counting
-STDIO_BYTES_READ=`grep STDIO_BYTES_READ $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_BYTES_READ=`grep STDIO_BYTES_READ $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
if [ ! "$STDIO_BYTES_READ" -eq 15 ]; then
echo "Error: STDIO bytes read count of $STDIO_BYTES_READ is incorrect" 1>&2
exit 1
=====================================
darshan-test/regression/test-cases/stdio-test.sh
=====================================
--- a/darshan-test/regression/test-cases/stdio-test.sh
+++ b/darshan-test/regression/test-cases/stdio-test.sh
@@ -31,22 +31,22 @@ fi
# check at least one counter from each general open/read/write/seek category
-STDIO_OPENS=`grep STDIO_OPENS $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_OPENS=`grep STDIO_OPENS $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
if [ ! "$STDIO_OPENS" -gt 0 ]; then
echo "Error: STDIO open count of $STDIO_OPENS is incorrect" 1>&2
exit 1
fi
-STDIO_SEEKS=`grep STDIO_SEEKS $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_SEEKS=`grep STDIO_SEEKS $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
if [ ! "$STDIO_SEEKS" -gt 0 ]; then
echo "Error: STDIO open count of $STDIO_SEEKS is incorrect" 1>&2
exit 1
fi
-STDIO_BYTES_WRITTEN=`grep STDIO_BYTES_WRITTEN $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_BYTES_WRITTEN=`grep STDIO_BYTES_WRITTEN $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
if [ ! "$STDIO_BYTES_WRITTEN" -eq 6 ]; then
echo "Error: STDIO open count of $STDIO_BYTES_WRITTEN is incorrect" 1>&2
exit 1
fi
-STDIO_BYTES_READ=`grep STDIO_BYTES_READ $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_BYTES_READ=`grep STDIO_BYTES_READ $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
if [ ! "$STDIO_BYTES_READ" -eq 6 ]; then
echo "Error: STDIO open count of $STDIO_BYTES_READ is incorrect" 1>&2
exit 1
@@ -55,32 +55,32 @@ fi
# make sure that some of the floating point counters are valid
# use bc for floating point comparison
-STDIO_F_OPEN_START_TIMESTAMP=`grep STDIO_F_OPEN_START_TIMESTAMP $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_F_OPEN_START_TIMESTAMP=`grep STDIO_F_OPEN_START_TIMESTAMP $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
if [ ! $(echo "$STDIO_F_OPEN_START_TIMESTAMP > 0" | bc -l) ]; then
echo "Error: counter is incorrect" 1>&2
exit 1
fi
-STDIO_F_OPEN_END_TIMESTAMP=`grep STDIO_F_OPEN_END_TIMESTAMP $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_F_OPEN_END_TIMESTAMP=`grep STDIO_F_OPEN_END_TIMESTAMP $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
if [ ! $(echo "$STDIO_F_OPEN_END_TIMESTAMP > 0" | bc -l) ]; then
echo "Error: counter is incorrect" 1>&2
exit 1
fi
-STDIO_F_META_TIME=`grep STDIO_F_META_TIME $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_F_META_TIME=`grep STDIO_F_META_TIME $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
if [ ! $(echo "$STDIO_F_META_TIME > 0" | bc -l) ]; then
echo "Error: counter is incorrect" 1>&2
exit 1
fi
-STDIO_F_WRITE_TIME=`grep STDIO_F_WRITE_TIME $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_F_WRITE_TIME=`grep STDIO_F_WRITE_TIME $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
if [ ! $(echo "$STDIO_F_WRITE_TIME > 0" | bc -l) ]; then
echo "Error: counter is incorrect" 1>&2
exit 1
fi
-STDIO_F_CLOSE_START_TIMESTAMP=`grep STDIO_F_CLOSE_START_TIMESTAMP $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_F_CLOSE_START_TIMESTAMP=`grep STDIO_F_CLOSE_START_TIMESTAMP $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
if [ ! $(echo "$STDIO_F_CLOSE_START_TIMESTAMP > 0" | bc -l) ]; then
echo "Error: counter is incorrect" 1>&2
exit 1
fi
-STDIO_F_CLOSE_END_TIMESTAMP=`grep STDIO_F_CLOSE_END_TIMESTAMP $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_F_CLOSE_END_TIMESTAMP=`grep STDIO_F_CLOSE_END_TIMESTAMP $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
if [ ! $(echo "$STDIO_F_CLOSE_END_TIMESTAMP > 0" | bc -l) ]; then
echo "Error: counter is incorrect" 1>&2
exit 1
=====================================
darshan-util/darshan-convert.c
=====================================
--- a/darshan-util/darshan-convert.c
+++ b/darshan-util/darshan-convert.c
@@ -138,19 +138,41 @@ void obfuscate_exe(int key, char *exe)
return;
}
-void obfuscate_filenames(int key, struct darshan_name_record_ref *name_hash)
+void obfuscate_filenames(int key, struct darshan_name_record_ref *name_hash, struct darshan_mnt_info *mnt_data_array, int mount_count )
{
struct darshan_name_record_ref *ref, *tmp;
uint32_t hashed;
- char tmp_string[128] = {0};
+ char tmp_string[PATH_MAX+128] = {0};
darshan_record_id tmp_id;
HASH_ITER(hlink, name_hash, ref, tmp)
{
+ /* find file system */
+ int j;
+ char *mnt_pt = NULL;
+
+ /* get mount point and fs type associated with this record */
+ for(j=0; j<mount_count; j++)
+ {
+ if(strncmp(mnt_data_array[j].mnt_path, ref->name_record->name,
+ strlen(mnt_data_array[j].mnt_path)) == 0)
+ {
+ mnt_pt = mnt_data_array[j].mnt_path;
+ break;
+ }
+ }
+
tmp_id = ref->name_record->id;
hashed = darshan_hashlittle(ref->name_record->name,
strlen(ref->name_record->name), key);
- sprintf(tmp_string, "%u", hashed);
+ if ( mnt_pt != NULL )
+ {
+ sprintf(tmp_string, "%s/%u", mnt_pt, hashed);
+ }
+ else
+ {
+ sprintf(tmp_string, "%u", hashed);
+ }
free(ref->name_record);
ref->name_record = malloc(sizeof(struct darshan_name_record) +
strlen(tmp_string));
@@ -330,7 +352,7 @@ int main(int argc, char **argv)
/* NOTE: obfuscating filepaths breaks the ability to map files
* to the corresponding FS & mount info maintained by darshan
*/
- if(obfuscate) obfuscate_filenames(key, name_hash);
+ if(obfuscate) obfuscate_filenames(key, name_hash, mnt_data_array, mount_count );
if(hash) remove_hash_recs(&name_hash, hash);
ret = darshan_log_put_namehash(outfile, name_hash);
=====================================
darshan-util/darshan-logutils.c
=====================================
--- a/darshan-util/darshan-logutils.c
+++ b/darshan-util/darshan-logutils.c
@@ -586,7 +586,7 @@ int darshan_log_put_namehash(darshan_fd fd, struct darshan_name_record_ref *hash
{
struct darshan_fd_int_state *state = fd->state;
struct darshan_name_record_ref *ref, *tmp;
- struct darshan_name_record_ref *name_rec;
+ struct darshan_name_record *name_rec;
int name_rec_len;
int wrote;
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/compare/e95a46ed1961d61efb351d89fbc536e2e1c672d3...4addc19b7b8ce1944b98884dc83cc085f361cf5e
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20160912/69fcefba/attachment-0001.html>
More information about the Darshan-commits
mailing list