[Darshan-commits] [Git][darshan/darshan][master] 2 commits: switch lustre ioctls to fgetxattr

Shane Snyder xgitlab at cels.anl.gov
Thu Mar 18 00:47:07 CDT 2021



Shane Snyder pushed to branch master at darshan / darshan


Commits:
332f4ebb by Shane Snyder at 2021-03-18T00:47:00-05:00
switch lustre ioctls to fgetxattr

some configure cleanup as well as modification of darshan
linker flags when lustre module is enabled

Fixes #270

- - - - -
e245da9d by Shane Snyder at 2021-03-18T00:47:00-05:00
Merge branch 'dev-270-lustre-ioctl-crash' into 'master'

modify Darshan's Lustre module to use fgetxattr for getting stripe info

See merge request darshan/darshan!89
- - - - -


5 changed files:

- darshan-runtime/configure
- darshan-runtime/configure.in
- darshan-runtime/darshan-config.in
- darshan-runtime/lib/darshan-lustre.c
- darshan-runtime/lib/pkgconfig/darshan-runtime.pc.in


Changes:

=====================================
darshan-runtime/configure
=====================================
@@ -623,6 +623,7 @@ ac_subst_vars='LTLIBOBJS
 LIBOBJS
 DARSHAN_MDHIM_LD_OPTS
 BUILD_MDHIM_MODULE
+DARSHAN_LUSTRE_LD_FLAGS
 BUILD_LUSTRE_MODULE
 BUILD_BGQ_MODULE
 DARSHAN_HDF5_LD_FLAGS
@@ -684,7 +685,6 @@ infodir
 docdir
 oldincludedir
 includedir
-runstatedir
 localstatedir
 sharedstatedir
 sysconfdir
@@ -779,7 +779,6 @@ datadir='${datarootdir}'
 sysconfdir='${prefix}/etc'
 sharedstatedir='${prefix}/com'
 localstatedir='${prefix}/var'
-runstatedir='${localstatedir}/run'
 includedir='${prefix}/include'
 oldincludedir='/usr/include'
 docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
@@ -1032,15 +1031,6 @@ do
   | -silent | --silent | --silen | --sile | --sil)
     silent=yes ;;
 
-  -runstatedir | --runstatedir | --runstatedi | --runstated \
-  | --runstate | --runstat | --runsta | --runst | --runs \
-  | --run | --ru | --r)
-    ac_prev=runstatedir ;;
-  -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
-  | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
-  | --run=* | --ru=* | --r=*)
-    runstatedir=$ac_optarg ;;
-
   -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
     ac_prev=sbindir ;;
   -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
@@ -1178,7 +1168,7 @@ fi
 for ac_var in	exec_prefix prefix bindir sbindir libexecdir datarootdir \
 		datadir sysconfdir sharedstatedir localstatedir includedir \
 		oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
-		libdir localedir mandir runstatedir
+		libdir localedir mandir
 do
   eval ac_val=\$$ac_var
   # Remove trailing slashes.
@@ -1331,7 +1321,6 @@ Fine tuning of the installation directories:
   --sysconfdir=DIR        read-only single-machine data [PREFIX/etc]
   --sharedstatedir=DIR    modifiable architecture-independent data [PREFIX/com]
   --localstatedir=DIR     modifiable single-machine data [PREFIX/var]
-  --runstatedir=DIR       modifiable per-process data [LOCALSTATEDIR/run]
   --libdir=DIR            object code libraries [EPREFIX/lib]
   --includedir=DIR        C header files [PREFIX/include]
   --oldincludedir=DIR     C header files for non-gcc [/usr/include]
@@ -4338,9 +4327,53 @@ fi
 
 # if lustre module not disabled, check for needed Lustre module header
 if test x$enable_lustre_mod != xno; then
-    ac_fn_c_check_header_mongrel "$LINENO" "lustre/lustre_user.h" "ac_cv_header_lustre_lustre_user_h" "$ac_includes_default"
-if test "x$ac_cv_header_lustre_lustre_user_h" = xyes; then :
+    ac_fn_c_check_header_mongrel "$LINENO" "lustre/lustreapi.h" "ac_cv_header_lustre_lustreapi_h" "$ac_includes_default"
+if test "x$ac_cv_header_lustre_lustreapi_h" = xyes; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for llapi_layout_get_by_xattr in -llustreapi" >&5
+$as_echo_n "checking for llapi_layout_get_by_xattr in -llustreapi... " >&6; }
+if ${ac_cv_lib_lustreapi_llapi_layout_get_by_xattr+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-llustreapi  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char llapi_layout_get_by_xattr ();
+int
+main ()
+{
+return llapi_layout_get_by_xattr ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_lustreapi_llapi_layout_get_by_xattr=yes
+else
+  ac_cv_lib_lustreapi_llapi_layout_get_by_xattr=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_lustreapi_llapi_layout_get_by_xattr" >&5
+$as_echo "$ac_cv_lib_lustreapi_llapi_layout_get_by_xattr" >&6; }
+if test "x$ac_cv_lib_lustreapi_llapi_layout_get_by_xattr" = xyes; then :
   BUILD_LUSTRE_MODULE=1
+	     DARSHAN_LUSTRE_LD_FLAGS="-llustreapi"
+else
+  if test "x$enable_lustre_mod" = xyes; then :
+  as_fn_error $? "Cannot find required llapi_layout_get_by_xattr function for the Lustre module" "$LINENO" 5
+fi
+fi
+
 else
   if test "x$enable_lustre_mod" = xyes; then :
   as_fn_error $? "Cannot find required headers for the Lustre module" "$LINENO" 5
@@ -5067,6 +5100,7 @@ DARSHAN_VERSION="3.2.1"
 
 
 
+
 
 
 ac_config_files="$ac_config_files Makefile darshan-mk-log-dirs.pl darshan-gen-cc.pl darshan-gen-cxx.pl darshan-gen-fortran.pl darshan-config share/craype-1.x/darshan-module share/craype-2.x/darshan-module lib/pkgconfig/darshan-runtime.pc share/mpi-profile/darshan-cc.conf share/mpi-profile/darshan-cxx.conf share/mpi-profile/darshan-f.conf share/mpi-profile/darshan-cc-static.conf share/mpi-profile/darshan-cxx-static.conf share/mpi-profile/darshan-f-static.conf share/ld-opts/darshan-base-ld-opts share/ld-opts/darshan-stdio-ld-opts share/ld-opts/darshan-hdf5-ld-opts"


=====================================
darshan-runtime/configure.in
=====================================
@@ -291,8 +291,13 @@ AC_ARG_ENABLE(lustre-mod,
               [enable_lustre_mod=check])
 # if lustre module not disabled, check for needed Lustre module header
 if test x$enable_lustre_mod != xno; then
-    AC_CHECK_HEADER([lustre/lustre_user.h],
-        BUILD_LUSTRE_MODULE=1,
+    AC_CHECK_HEADER([lustre/lustreapi.h],
+        [AC_CHECK_LIB(lustreapi, llapi_layout_get_by_xattr,
+            [BUILD_LUSTRE_MODULE=1
+	     DARSHAN_LUSTRE_LD_FLAGS="-llustreapi"],
+	    AS_IF([test "x$enable_lustre_mod" = xyes],
+                AC_MSG_ERROR(Cannot find required llapi_layout_get_by_xattr function for the Lustre module),
+                []))],
         AS_IF([test "x$enable_lustre_mod" = xyes],
             AC_MSG_ERROR(Cannot find required headers for the Lustre module),
             []))
@@ -530,6 +535,7 @@ AC_SUBST(DARSHAN_HDF5_ADD_DFLUSH_LD_OPTS)
 AC_SUBST(DARSHAN_HDF5_LD_FLAGS)
 AC_SUBST(BUILD_BGQ_MODULE)
 AC_SUBST(BUILD_LUSTRE_MODULE)
+AC_SUBST(DARSHAN_LUSTRE_LD_FLAGS)
 AC_SUBST(BUILD_MDHIM_MODULE)
 AC_SUBST(DARSHAN_MDHIM_LD_OPTS)
 AC_OUTPUT(Makefile


=====================================
darshan-runtime/darshan-config.in
=====================================
@@ -14,13 +14,13 @@ DARSHAN_LOG_ENV="@__DARSHAN_LOG_ENV@"
 #   app used a library which in turn used one of those HLLs).
 
 PRE_LD_FLAGS="-L$DARSHAN_LIB_PATH $DARSHAN_LD_FLAGS -ldarshan -lz -Wl,@$DARSHAN_SHARE_PATH/ld-opts/darshan-base-ld-opts"
-POST_LD_FLAGS="-L$DARSHAN_LIB_PATH -ldarshan -lz -lrt -lpthread"
+POST_LD_FLAGS="-L$DARSHAN_LIB_PATH -ldarshan @DARSHAN_LUSTRE_LD_FLAGS@ -lz -lrt -lpthread"
 
 # NOTE:
 # - when dynamic linking there is no need for wrapping options, we simply
 #   need to get the darshan symbol definitions early enough in the link
 #   order.  We also set no-as-needed for linkers that may not identify
-DYN_LD_FLAGS="-L$DARSHAN_LIB_PATH $DARSHAN_LD_FLAGS -Wl,-rpath=$DARSHAN_LIB_PATH -Wl,-no-as-needed -ldarshan @DARSHAN_HDF5_LD_FLAGS@"
+DYN_LD_FLAGS="-L$DARSHAN_LIB_PATH $DARSHAN_LD_FLAGS -Wl,-rpath=$DARSHAN_LIB_PATH -Wl,-no-as-needed -ldarshan @DARSHAN_LUSTRE_LD_FLAGS@ @DARSHAN_HDF5_LD_FLAGS@"
 
 # NOTE:
 # - construct complete list of log path options, separated by commas.


=====================================
darshan-runtime/lib/darshan-lustre.c
=====================================
@@ -16,9 +16,10 @@
 #include <stdlib.h>
 #include <assert.h>
 #include <pthread.h>
-#include <sys/ioctl.h>
+#include <limits.h>
+#include <sys/xattr.h>
 
-#include <lustre/lustre_user.h>
+#include <lustre/lustreapi.h>
 
 #include "darshan.h"
 #include "darshan-dynamic.h"
@@ -50,10 +51,6 @@ static int my_rank = -1;
 #define LUSTRE_LOCK() pthread_mutex_lock(&lustre_runtime_mutex)
 #define LUSTRE_UNLOCK() pthread_mutex_unlock(&lustre_runtime_mutex)
 
-#ifndef LOV_MAX_STRIPE_COUNT /* for Lustre < 2.4 */
-    #define LOV_MAX_STRIPE_COUNT 2000
-#endif
-
 void darshan_instrument_lustre_file(const char* filepath, int fd)
 {
     struct lustre_record_ref *rec_ref;
@@ -61,9 +58,12 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
     struct darshan_fs_info fs_info;
     darshan_record_id rec_id;
     int i;
-    struct lov_user_md *lum;
-    size_t lumsize = sizeof(struct lov_user_md) +
-        LOV_MAX_STRIPE_COUNT * sizeof(struct lov_user_ost_data);
+    void *lustre_xattr_val;
+    size_t lustre_xattr_size = XATTR_SIZE_MAX;
+    struct llapi_layout *lustre_layout;
+    uint64_t stripe_size;
+    uint64_t stripe_count;
+    uint64_t tmp_ost;
     size_t rec_size;
     int ret;
 
@@ -85,23 +85,40 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
         &rec_id, sizeof(darshan_record_id));
     if(!rec_ref)
     {
-        /* first issue LUSTRE ioctl to see if we can get stripe data */
-
-        /* if we can't issue ioctl, we have no counter data at all */
-        if ( (lum = calloc(1, lumsize)) == NULL )
+        if ( (lustre_xattr_val = calloc(1, lustre_xattr_size)) == NULL )
         {
             LUSTRE_UNLOCK();
             return;
         }
 
-        /* find out the OST count of this file so we can allocate memory */
-        lum->lmm_magic = LOV_USER_MAGIC;
-        lum->lmm_stripe_count = LOV_MAX_STRIPE_COUNT;
+        /* -1 means fgetxattr failed, likely because file isn't on Lustre, but maybe because
+         * the Lustre version doesn't support this method of obtaining striping info
+         */
+        if ( (lustre_xattr_size = fgetxattr( fd, "lustre.lov", lustre_xattr_val, lustre_xattr_size)) == -1 )
+        {
+            free(lustre_xattr_val);
+            LUSTRE_UNLOCK();
+            return;
+        }
 
-        /* -1 means ioctl failed, likely because file isn't on Lustre */
-        if ( ioctl( fd, LL_IOC_LOV_GETSTRIPE, (void *)lum ) == -1 )
+        /* get corresponding Lustre file layout, then extract stripe params */
+        if ( (lustre_layout = llapi_layout_get_by_xattr(lustre_xattr_val, lustre_xattr_size, 0)) == NULL)
         {
-            free(lum);
+            free(lustre_xattr_val);
+            LUSTRE_UNLOCK();
+            return;
+        }
+        if (llapi_layout_stripe_size_get(lustre_layout, &stripe_size) == -1)
+        {
+            llapi_layout_free(lustre_layout);
+            free(lustre_xattr_val);
+            LUSTRE_UNLOCK();
+            return;
+        }
+        if (llapi_layout_stripe_count_get(lustre_layout, &stripe_count) == -1)
+        {
+            llapi_layout_free(lustre_layout);
+            free(lustre_xattr_val);
             LUSTRE_UNLOCK();
             return;
         }
@@ -110,7 +127,8 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
         rec_ref = malloc(sizeof(*rec_ref));
         if(!rec_ref)
         {
-            free(lum);
+            llapi_layout_free(lustre_layout);
+            free(lustre_xattr_val);
             LUSTRE_UNLOCK();
             return;
         }
@@ -120,12 +138,13 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
         if(ret == 0)
         {
             free(rec_ref);
-            free(lum);
+            llapi_layout_free(lustre_layout);
+            free(lustre_xattr_val);
             LUSTRE_UNLOCK();
             return;
         }
 
-        rec_size = LUSTRE_RECORD_SIZE( lum->lmm_stripe_count );
+        rec_size = LUSTRE_RECORD_SIZE( stripe_count );
 
         /* register a Lustre file record with Darshan */
         fs_info.fs_type = -1;
@@ -142,7 +161,8 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
             darshan_delete_record_ref(&(lustre_runtime->record_id_hash),
                 &rec_id, sizeof(darshan_record_id));
             free(rec_ref);
-            free(lum);
+            llapi_layout_free(lustre_layout);
+            free(lustre_xattr_val);
             LUSTRE_UNLOCK();
             return;
         }
@@ -161,12 +181,25 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
             rec->counters[LUSTRE_MDTS] = -1;
         }
 
-        rec->counters[LUSTRE_STRIPE_SIZE] = lum->lmm_stripe_size;
-        rec->counters[LUSTRE_STRIPE_WIDTH] = lum->lmm_stripe_count;
-        rec->counters[LUSTRE_STRIPE_OFFSET] = lum->lmm_stripe_offset;
-        for ( i = 0; i < lum->lmm_stripe_count; i++ )
-            rec->ost_ids[i] = lum->lmm_objects[i].l_ost_idx;
-        free(lum);
+        rec->counters[LUSTRE_STRIPE_SIZE] = stripe_size;
+        rec->counters[LUSTRE_STRIPE_WIDTH] = stripe_count;
+        rec->counters[LUSTRE_STRIPE_OFFSET] = -1; // no longer captured
+        for ( i = 0; i < stripe_count; i++ )
+        {
+            if (llapi_layout_ost_index_get(lustre_layout, i, &tmp_ost) == -1)
+            {
+                darshan_delete_record_ref(&(lustre_runtime->record_id_hash),
+                    &rec_id, sizeof(darshan_record_id));
+                free(rec_ref);
+                llapi_layout_free(lustre_layout);
+                free(lustre_xattr_val);
+                LUSTRE_UNLOCK();
+                return;
+            }
+            rec->ost_ids[i] = (int64_t)tmp_ost;
+        }
+        free(lustre_xattr_val);
+        llapi_layout_free(lustre_layout);
 
         rec->base_rec.id = rec_id;
         rec->base_rec.rank = my_rank;


=====================================
darshan-runtime/lib/pkgconfig/darshan-runtime.pc.in
=====================================
@@ -15,5 +15,5 @@ darshan_libdir= -L${darshan_prefix}/lib
 darshan_linkopts="-Wl,@${darshan_share}/ld-opts/darshan-base-ld-opts"
 
 Cflags:
-Libs: ${darshan_libdir} -Wl,-rpath=${darshan_prefix}/lib -Wl,-no-as-needed -ldarshan @DARSHAN_HDF5_LD_FLAGS@
-Libs.private: ${darshan_linkopts} ${darshan_libdir} -lfmpich -lmpichcxx -ldarshan
+Libs: ${darshan_libdir} -Wl,-rpath=${darshan_prefix}/lib -Wl,-no-as-needed -ldarshan @DARSHAN_LUSTRE_LD_FLAGS@ @DARSHAN_HDF5_LD_FLAGS@
+Libs.private: ${darshan_linkopts} ${darshan_libdir} -lfmpich -lmpichcxx -ldarshan @DARSHAN_LUSTRE_LD_FLAGS@ -lz -lrt -lpthread



View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/-/compare/7d4a80abaa2f2bfcc767f4f988fbf9dafff95926...e245da9d371d38def7625f7b9a1c53d8afa91f45

-- 
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/-/compare/7d4a80abaa2f2bfcc767f4f988fbf9dafff95926...e245da9d371d38def7625f7b9a1c53d8afa91f45
You're receiving this email because of your account on xgitlab.cels.anl.gov.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20210318/327fd511/attachment-0001.html>


More information about the Darshan-commits mailing list