[Darshan-commits] [Git][darshan/darshan][master] bug fix in gathering of lustre fs data

Shane Snyder xgitlab at cels.anl.gov
Wed Jul 6 10:24:53 CDT 2016


Shane Snyder pushed to branch master at darshan / darshan


Commits:
2770f997 by Shane Snyder at 2016-07-06T08:21:19-07:00
bug fix in gathering of lustre fs data

There are cases where lustre stripe info is not available the
first time a given file is opened (e.g., when using the
llapi_file_create call). So, now we call into the lustre module
every time a file is opened to give the module a chance to gather
data.

- - - - -


2 changed files:

- darshan-runtime/lib/darshan-lustre.c
- darshan-runtime/lib/darshan-posix.c


Changes:

=====================================
darshan-runtime/lib/darshan-lustre.c
=====================================
--- a/darshan-runtime/lib/darshan-lustre.c
+++ b/darshan-runtime/lib/darshan-lustre.c
@@ -73,34 +73,34 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
         return;
     }
 
-    /* if we can't issue ioctl, we have no counter data at all */
-    if ( (lum = calloc(1, lumsize)) == NULL )
-    {
-        LUSTRE_UNLOCK();
-        return;
-    }
-
-    /* find out the OST count of this file so we can allocate memory */
-    lum->lmm_magic = LOV_USER_MAGIC;
-    lum->lmm_stripe_count = LOV_MAX_STRIPE_COUNT;
-
-    /* -1 means ioctl failed, likely because file isn't on Lustre */
-    if ( ioctl( fd, LL_IOC_LOV_GETSTRIPE, (void *)lum ) == -1 )
-    {
-        free(lum);
-        LUSTRE_UNLOCK();
-        return;
-    }
-
-    rec_id = darshan_core_gen_record_id(filepath);
-    rec_size = LUSTRE_RECORD_SIZE( lum->lmm_stripe_count );
-
     /* search the hash table for this file record, and initialize if not found */
+    rec_id = darshan_core_gen_record_id(filepath);
     rec_ref = darshan_lookup_record_ref(lustre_runtime->record_id_hash,
         &rec_id, sizeof(darshan_record_id));
     if(!rec_ref)
     {
-        /* not found, allocate and add a new record reference */
+        /* first issue LUSTRE ioctl to see if we can get stripe data */
+
+        /* if we can't issue ioctl, we have no counter data at all */
+        if ( (lum = calloc(1, lumsize)) == NULL )
+        {
+            LUSTRE_UNLOCK();
+            return;
+        }
+
+        /* find out the OST count of this file so we can allocate memory */
+        lum->lmm_magic = LOV_USER_MAGIC;
+        lum->lmm_stripe_count = LOV_MAX_STRIPE_COUNT;
+
+        /* -1 means ioctl failed, likely because file isn't on Lustre */
+        if ( ioctl( fd, LL_IOC_LOV_GETSTRIPE, (void *)lum ) == -1 )
+        {
+            free(lum);
+            LUSTRE_UNLOCK();
+            return;
+        }
+
+        /* allocate and add a new record reference */
         rec_ref = malloc(sizeof(*rec_ref));
         if(!rec_ref)
         {
@@ -119,6 +119,8 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
             return;
         }
 
+        rec_size = LUSTRE_RECORD_SIZE( lum->lmm_stripe_count );
+
         /* register a Lustre file record with Darshan */
         fs_info.fs_type = -1;
         rec = darshan_core_register_record(


=====================================
darshan-runtime/lib/darshan-posix.c
=====================================
--- a/darshan-runtime/lib/darshan-posix.c
+++ b/darshan-runtime/lib/darshan-posix.c
@@ -37,8 +37,8 @@ typedef int64_t off64_t;
 #define aiocb64 aiocb
 #endif
 
-#ifndef LL_SUPER_MAGIC
-#define LL_SUPER_MAGIC 0x0BD00BD0
+#ifdef DARSHAN_LUSTRE
+#include <lustre/lustre_user.h>
 #endif
 
 DARSHAN_FORWARD_DECL(open, int, (const char *path, int flags, ...));
@@ -122,6 +122,7 @@ struct posix_file_record_ref
     void *stride_root;
     int stride_count;
     struct posix_aio_tracker* aio_list;
+    int fs_type; /* same as darshan_fs_info->fs_type */
 };
 
 /* The posix_runtime structure maintains necessary state for storing
@@ -146,7 +147,9 @@ struct posix_aio_tracker
 static void posix_runtime_initialize(
     void);
 static struct posix_file_record_ref *posix_track_new_file_record(
-    darshan_record_id rec_id, const char *path, int fd);
+    darshan_record_id rec_id, const char *path);
+static void posix_instrument_fs_data(
+    int fs_type, const char *path, int fd);
 static void posix_aio_tracker_add(
     int fd, void *aiocbp);
 static struct posix_aio_tracker* posix_aio_tracker_del(
@@ -207,7 +210,7 @@ static int darshan_mem_alignment = 1;
     } \
     rec_id = darshan_core_gen_record_id(newpath); \
     rec_ref = darshan_lookup_record_ref(posix_runtime->rec_id_hash, &rec_id, sizeof(darshan_record_id)); \
-    if(!rec_ref) rec_ref = posix_track_new_file_record(rec_id, newpath, __ret); \
+    if(!rec_ref) rec_ref = posix_track_new_file_record(rec_id, newpath); \
     if(!rec_ref) { \
         if(newpath != __path) free(newpath); \
         break; \
@@ -227,6 +230,7 @@ static int darshan_mem_alignment = 1;
     DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[POSIX_F_META_TIME], \
         __tm1, __tm2, rec_ref->last_meta_end); \
     darshan_add_record_ref(&(posix_runtime->fd_hash), &__ret, sizeof(int), rec_ref); \
+    posix_instrument_fs_data(rec_ref->fs_type, newpath, __ret); \
     if(newpath != __path) free(newpath); \
 } while(0)
 
@@ -355,7 +359,7 @@ static int darshan_mem_alignment = 1;
     } \
     rec_id = darshan_core_gen_record_id(newpath); \
     rec_ref = darshan_lookup_record_ref(posix_runtime->rec_id_hash, &rec_id, sizeof(darshan_record_id)); \
-    if(!rec_ref) rec_ref = posix_track_new_file_record(rec_id, newpath, -1); \
+    if(!rec_ref) rec_ref = posix_track_new_file_record(rec_id, newpath); \
     if(newpath != __path) free(newpath); \
     if(rec_ref) { \
         POSIX_RECORD_STAT(rec_ref, __statbuf, __tm1, __tm2); \
@@ -1451,7 +1455,7 @@ static void posix_runtime_initialize()
 }
 
 static struct posix_file_record_ref *posix_track_new_file_record(
-    darshan_record_id rec_id, const char *path, int fd)
+    darshan_record_id rec_id, const char *path)
 {
     struct darshan_posix_file *file_rec = NULL;
     struct posix_file_record_ref *rec_ref = NULL;
@@ -1496,18 +1500,23 @@ static struct posix_file_record_ref *posix_track_new_file_record(
     file_rec->counters[POSIX_MEM_ALIGNMENT] = darshan_mem_alignment;
     file_rec->counters[POSIX_FILE_ALIGNMENT] = fs_info.block_size;
     rec_ref->file_rec = file_rec;
+    rec_ref->fs_type = fs_info.fs_type;
     posix_runtime->file_rec_count++;
 
-    if(fd >= 0)
-    {
+    return(rec_ref);
+}
+
+static void posix_instrument_fs_data(int fs_type, const char *path, int fd)
+{
 #ifdef DARSHAN_LUSTRE
-        /* allow lustre to generate a record if we configured with lustre support */
-        if(fs_info.fs_type == LL_SUPER_MAGIC)
-            darshan_instrument_lustre_file(path, fd);
-#endif
+    /* allow lustre to generate a record if we configured with lustre support */
+    if(fs_type == LL_SUPER_MAGIC)
+    {
+        darshan_instrument_lustre_file(path, fd);
+        return;
     }
-
-    return(rec_ref);
+#endif
+    return;
 }
 
 /* finds the tracker structure for a given aio operation, removes it from



View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/commit/2770f997eeb71e87401465da6348bc87b5d3c440
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20160706/b6ba7d81/attachment-0001.html>


More information about the Darshan-commits mailing list