[Darshan-commits] [Git][darshan/darshan][dev-stdio-utils] 8 commits: prepend file system mount point to hashed file name to retain file system info for obfuscated files

Philip Carns xgitlab at cels.anl.gov
Mon Sep 12 15:32:12 CDT 2016


Philip Carns pushed to branch dev-stdio-utils at darshan / darshan


Commits:
0d1f6e80 by Glenn K. Lockwood at 2016-08-16T13:26:33-07:00
prepend file system mount point to hashed file name to retain file system info for obfuscated files

- - - - -
6e75dade by Glenn K. Lockwood at 2016-08-24T09:57:37-07:00
Merge branch 'master' into gkl/obfuscate-fs-fix to pull in fix from d4dc84f

- - - - -
eadbecfd by Glenn K. Lockwood at 2016-08-24T10:00:07-07:00
fix wrong struct type (inconsequential as it may be in the given context)

- - - - -
0ed720e8 by Shane Snyder at 2016-08-24T21:30:03-05:00
filter stderr/stdin/stdout for regression tests

- - - - -
df34ba2b by Shane Snyder at 2016-09-08T15:16:31-07:00
add unique id to mmap log file names

This is needed to distinguish between log files generated with
the same job id (i.e., for jobs that have multiple steps)

- - - - -
7f05cf73 by Shane Snyder at 2016-09-09T16:39:48-05:00
Merge branch 'gkl/obfuscate-fs-fix' into 'master'

gkl/obfuscate fs fix: prepend file path to hashed file name to retain fs info

Prepends the file system path to the hashed file name generated by `darshan-convert --obfuscate` so that darshan-parser can still link obfuscated file records to the mounts table.

See merge request !1
- - - - -
c877a1c1 by Shane Snyder at 2016-09-12T13:28:37-05:00
remove POSIX fstream references from job-summary

- - - - -
4addc19b by Phil Carns at 2016-09-12T16:28:02-04:00
Merge branch 'dev-stdio-utils'

- - - - -


5 changed files:

- darshan-runtime/lib/darshan-core.c
- darshan-test/regression/test-cases/fprintf-fscanf-test.sh
- darshan-test/regression/test-cases/stdio-test.sh
- darshan-util/darshan-convert.c
- darshan-util/darshan-logutils.c


Changes:

=====================================
darshan-runtime/lib/darshan-core.c
=====================================
--- a/darshan-runtime/lib/darshan-core.c
+++ b/darshan-runtime/lib/darshan-core.c
@@ -786,6 +786,9 @@ static void *darshan_init_mmap_log(struct darshan_core_runtime* core, int jobid)
     int mmap_size;
     int sys_page_size;
     char cuser[L_cuserid] = {0};
+    uint64_t hlevel;
+    char hname[HOST_NAME_MAX];
+    uint64_t logmod;
     char *envstr;
     char *mmap_log_path;
     void *mmap_p;
@@ -806,12 +809,25 @@ static void *darshan_init_mmap_log(struct darshan_core_runtime* core, int jobid)
 
     darshan_get_user_name(cuser);
 
+    /* generate a random number to help differentiate the temporary log */
+    /* NOTE: job id is not sufficient for constructing a unique log file name,
+     * since a job could be composed of multiple application runs, so we also
+     * add a random number component to the log name
+     */
+    if(my_rank == 0)
+    {
+        hlevel=DARSHAN_MPI_CALL(PMPI_Wtime)() * 1000000;
+        (void)gethostname(hname, sizeof(hname));
+        logmod = darshan_hash((void*)hname,strlen(hname),hlevel);
+    }
+    DARSHAN_MPI_CALL(PMPI_Bcast)(&logmod, 1, MPI_UINT64_T, 0, MPI_COMM_WORLD);
+
     /* construct a unique temporary log file name for this process
      * to write mmap log data to
      */
     snprintf(core->mmap_log_name, PATH_MAX,
-        "/%s/%s_%s_id%d_mmap-log-%d.darshan",
-        mmap_log_path, cuser, __progname, jobid, my_rank);
+        "/%s/%s_%s_id%d_mmap-log-%" PRIu64 "-%d.darshan",
+        mmap_log_path, cuser, __progname, jobid, logmod, my_rank);
 
     /* create the temporary mmapped darshan log */
     mmap_fd = open(core->mmap_log_name, O_CREAT|O_RDWR|O_EXCL , 0644);


=====================================
darshan-test/regression/test-cases/fprintf-fscanf-test.sh
=====================================
--- a/darshan-test/regression/test-cases/fprintf-fscanf-test.sh
+++ b/darshan-test/regression/test-cases/fprintf-fscanf-test.sh
@@ -29,21 +29,21 @@ fi
 
 # check results
 
-STDIO_OPENS=`grep STDIO_OPENS $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_OPENS=`grep STDIO_OPENS $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
 if [ ! "$STDIO_OPENS" -eq 4 ]; then
     echo "Error: STDIO open count of $STDIO_OPENS is incorrect" 1>&2
     exit 1
 fi
 
 # this will check fprintf counting
-STDIO_BYTES_WRITTEN=`grep STDIO_BYTES_WRITTEN $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_BYTES_WRITTEN=`grep STDIO_BYTES_WRITTEN $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
 if [ ! "$STDIO_BYTES_WRITTEN" -eq 15 ]; then
     echo "Error: STDIO bytes written count of $STDIO_BYTES_WRITTEN is incorrect" 1>&2
     exit 1
 fi
 
 # this will check fscanf counting
-STDIO_BYTES_READ=`grep STDIO_BYTES_READ $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_BYTES_READ=`grep STDIO_BYTES_READ $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
 if [ ! "$STDIO_BYTES_READ" -eq 15 ]; then
     echo "Error: STDIO bytes read count of $STDIO_BYTES_READ is incorrect" 1>&2
     exit 1


=====================================
darshan-test/regression/test-cases/stdio-test.sh
=====================================
--- a/darshan-test/regression/test-cases/stdio-test.sh
+++ b/darshan-test/regression/test-cases/stdio-test.sh
@@ -31,22 +31,22 @@ fi
 
 # check at least one counter from each general open/read/write/seek category
 
-STDIO_OPENS=`grep STDIO_OPENS $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_OPENS=`grep STDIO_OPENS $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
 if [ ! "$STDIO_OPENS" -gt 0 ]; then
     echo "Error: STDIO open count of $STDIO_OPENS is incorrect" 1>&2
     exit 1
 fi
-STDIO_SEEKS=`grep STDIO_SEEKS $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_SEEKS=`grep STDIO_SEEKS $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
 if [ ! "$STDIO_SEEKS" -gt 0 ]; then
     echo "Error: STDIO open count of $STDIO_SEEKS is incorrect" 1>&2
     exit 1
 fi
-STDIO_BYTES_WRITTEN=`grep STDIO_BYTES_WRITTEN $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_BYTES_WRITTEN=`grep STDIO_BYTES_WRITTEN $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
 if [ ! "$STDIO_BYTES_WRITTEN" -eq 6 ]; then
     echo "Error: STDIO open count of $STDIO_BYTES_WRITTEN is incorrect" 1>&2
     exit 1
 fi
-STDIO_BYTES_READ=`grep STDIO_BYTES_READ $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_BYTES_READ=`grep STDIO_BYTES_READ $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
 if [ ! "$STDIO_BYTES_READ" -eq 6 ]; then
     echo "Error: STDIO open count of $STDIO_BYTES_READ is incorrect" 1>&2
     exit 1
@@ -55,32 +55,32 @@ fi
 
 # make sure that some of the floating point counters are valid
 # use bc for floating point comparison
-STDIO_F_OPEN_START_TIMESTAMP=`grep STDIO_F_OPEN_START_TIMESTAMP $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_F_OPEN_START_TIMESTAMP=`grep STDIO_F_OPEN_START_TIMESTAMP $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
 if [ ! $(echo "$STDIO_F_OPEN_START_TIMESTAMP > 0" | bc -l) ]; then
     echo "Error: counter is incorrect" 1>&2
     exit 1
 fi
-STDIO_F_OPEN_END_TIMESTAMP=`grep STDIO_F_OPEN_END_TIMESTAMP $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_F_OPEN_END_TIMESTAMP=`grep STDIO_F_OPEN_END_TIMESTAMP $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
 if [ ! $(echo "$STDIO_F_OPEN_END_TIMESTAMP > 0" | bc -l) ]; then
     echo "Error: counter is incorrect" 1>&2
     exit 1
 fi
-STDIO_F_META_TIME=`grep STDIO_F_META_TIME $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_F_META_TIME=`grep STDIO_F_META_TIME $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
 if [ ! $(echo "$STDIO_F_META_TIME > 0" | bc -l) ]; then
     echo "Error: counter is incorrect" 1>&2
     exit 1
 fi
-STDIO_F_WRITE_TIME=`grep STDIO_F_WRITE_TIME $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_F_WRITE_TIME=`grep STDIO_F_WRITE_TIME $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
 if [ ! $(echo "$STDIO_F_WRITE_TIME > 0" | bc -l) ]; then
     echo "Error: counter is incorrect" 1>&2
     exit 1
 fi
-STDIO_F_CLOSE_START_TIMESTAMP=`grep STDIO_F_CLOSE_START_TIMESTAMP $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_F_CLOSE_START_TIMESTAMP=`grep STDIO_F_CLOSE_START_TIMESTAMP $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
 if [ ! $(echo "$STDIO_F_CLOSE_START_TIMESTAMP > 0" | bc -l) ]; then
     echo "Error: counter is incorrect" 1>&2
     exit 1
 fi
-STDIO_F_CLOSE_END_TIMESTAMP=`grep STDIO_F_CLOSE_END_TIMESTAMP $DARSHAN_TMP/${PROG}.darshan.txt | cut -f 5`
+STDIO_F_CLOSE_END_TIMESTAMP=`grep STDIO_F_CLOSE_END_TIMESTAMP $DARSHAN_TMP/${PROG}.darshan.txt | grep -vE "STDIN|STDOUT|STDERR" | cut -f 5`
 if [ ! $(echo "$STDIO_F_CLOSE_END_TIMESTAMP > 0" | bc -l) ]; then
     echo "Error: counter is incorrect" 1>&2
     exit 1


=====================================
darshan-util/darshan-convert.c
=====================================
--- a/darshan-util/darshan-convert.c
+++ b/darshan-util/darshan-convert.c
@@ -138,19 +138,41 @@ void obfuscate_exe(int key, char *exe)
     return;
 }
 
-void obfuscate_filenames(int key, struct darshan_name_record_ref *name_hash)
+void obfuscate_filenames(int key, struct darshan_name_record_ref *name_hash, struct darshan_mnt_info *mnt_data_array, int mount_count )
 {
     struct darshan_name_record_ref *ref, *tmp;
     uint32_t hashed;
-    char tmp_string[128] = {0};
+    char tmp_string[PATH_MAX+128] = {0};
     darshan_record_id tmp_id;
 
     HASH_ITER(hlink, name_hash, ref, tmp)
     {
+        /* find file system */
+        int j;
+        char *mnt_pt = NULL;
+
+        /* get mount point and fs type associated with this record */
+        for(j=0; j<mount_count; j++)
+        {
+            if(strncmp(mnt_data_array[j].mnt_path, ref->name_record->name,
+                strlen(mnt_data_array[j].mnt_path)) == 0)
+            {
+                mnt_pt = mnt_data_array[j].mnt_path;
+                break;
+            }
+        }
+
         tmp_id = ref->name_record->id;
         hashed = darshan_hashlittle(ref->name_record->name,
             strlen(ref->name_record->name), key);
-        sprintf(tmp_string, "%u", hashed);
+        if ( mnt_pt != NULL ) 
+        {
+            sprintf(tmp_string, "%s/%u", mnt_pt, hashed);
+        }
+        else 
+        {
+            sprintf(tmp_string, "%u", hashed);
+        }
         free(ref->name_record);
         ref->name_record = malloc(sizeof(struct darshan_name_record) +
             strlen(tmp_string));
@@ -330,7 +352,7 @@ int main(int argc, char **argv)
     /* NOTE: obfuscating filepaths breaks the ability to map files
      * to the corresponding FS & mount info maintained by darshan
      */
-    if(obfuscate) obfuscate_filenames(key, name_hash);
+    if(obfuscate) obfuscate_filenames(key, name_hash, mnt_data_array, mount_count );
     if(hash) remove_hash_recs(&name_hash, hash);
 
     ret = darshan_log_put_namehash(outfile, name_hash);


=====================================
darshan-util/darshan-logutils.c
=====================================
--- a/darshan-util/darshan-logutils.c
+++ b/darshan-util/darshan-logutils.c
@@ -586,7 +586,7 @@ int darshan_log_put_namehash(darshan_fd fd, struct darshan_name_record_ref *hash
 {
     struct darshan_fd_int_state *state = fd->state;
     struct darshan_name_record_ref *ref, *tmp;
-    struct darshan_name_record_ref *name_rec;
+    struct darshan_name_record *name_rec;
     int name_rec_len;
     int wrote;
 



View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/compare/e95a46ed1961d61efb351d89fbc536e2e1c672d3...4addc19b7b8ce1944b98884dc83cc085f361cf5e
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20160912/69fcefba/attachment-0001.html>


More information about the Darshan-commits mailing list