[Darshan-commits] [Git][darshan/darshan][master] 2 commits: switch lustre ioctls to fgetxattr
Shane Snyder
xgitlab at cels.anl.gov
Thu Mar 18 00:47:07 CDT 2021
Shane Snyder pushed to branch master at darshan / darshan
Commits:
332f4ebb by Shane Snyder at 2021-03-18T00:47:00-05:00
switch lustre ioctls to fgetxattr
some configure cleanup as well as modification of darshan
linker flags when lustre module is enabled
Fixes #270
- - - - -
e245da9d by Shane Snyder at 2021-03-18T00:47:00-05:00
Merge branch 'dev-270-lustre-ioctl-crash' into 'master'
modify Darshan's Lustre module to use fgetxattr for getting stripe info
See merge request darshan/darshan!89
- - - - -
5 changed files:
- darshan-runtime/configure
- darshan-runtime/configure.in
- darshan-runtime/darshan-config.in
- darshan-runtime/lib/darshan-lustre.c
- darshan-runtime/lib/pkgconfig/darshan-runtime.pc.in
Changes:
=====================================
darshan-runtime/configure
=====================================
@@ -623,6 +623,7 @@ ac_subst_vars='LTLIBOBJS
LIBOBJS
DARSHAN_MDHIM_LD_OPTS
BUILD_MDHIM_MODULE
+DARSHAN_LUSTRE_LD_FLAGS
BUILD_LUSTRE_MODULE
BUILD_BGQ_MODULE
DARSHAN_HDF5_LD_FLAGS
@@ -684,7 +685,6 @@ infodir
docdir
oldincludedir
includedir
-runstatedir
localstatedir
sharedstatedir
sysconfdir
@@ -779,7 +779,6 @@ datadir='${datarootdir}'
sysconfdir='${prefix}/etc'
sharedstatedir='${prefix}/com'
localstatedir='${prefix}/var'
-runstatedir='${localstatedir}/run'
includedir='${prefix}/include'
oldincludedir='/usr/include'
docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
@@ -1032,15 +1031,6 @@ do
| -silent | --silent | --silen | --sile | --sil)
silent=yes ;;
- -runstatedir | --runstatedir | --runstatedi | --runstated \
- | --runstate | --runstat | --runsta | --runst | --runs \
- | --run | --ru | --r)
- ac_prev=runstatedir ;;
- -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
- | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
- | --run=* | --ru=* | --r=*)
- runstatedir=$ac_optarg ;;
-
-sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
ac_prev=sbindir ;;
-sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
@@ -1178,7 +1168,7 @@ fi
for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \
datadir sysconfdir sharedstatedir localstatedir includedir \
oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
- libdir localedir mandir runstatedir
+ libdir localedir mandir
do
eval ac_val=\$$ac_var
# Remove trailing slashes.
@@ -1331,7 +1321,6 @@ Fine tuning of the installation directories:
--sysconfdir=DIR read-only single-machine data [PREFIX/etc]
--sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
--localstatedir=DIR modifiable single-machine data [PREFIX/var]
- --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run]
--libdir=DIR object code libraries [EPREFIX/lib]
--includedir=DIR C header files [PREFIX/include]
--oldincludedir=DIR C header files for non-gcc [/usr/include]
@@ -4338,9 +4327,53 @@ fi
# if lustre module not disabled, check for needed Lustre module header
if test x$enable_lustre_mod != xno; then
- ac_fn_c_check_header_mongrel "$LINENO" "lustre/lustre_user.h" "ac_cv_header_lustre_lustre_user_h" "$ac_includes_default"
-if test "x$ac_cv_header_lustre_lustre_user_h" = xyes; then :
+ ac_fn_c_check_header_mongrel "$LINENO" "lustre/lustreapi.h" "ac_cv_header_lustre_lustreapi_h" "$ac_includes_default"
+if test "x$ac_cv_header_lustre_lustreapi_h" = xyes; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for llapi_layout_get_by_xattr in -llustreapi" >&5
+$as_echo_n "checking for llapi_layout_get_by_xattr in -llustreapi... " >&6; }
+if ${ac_cv_lib_lustreapi_llapi_layout_get_by_xattr+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_check_lib_save_LIBS=$LIBS
+LIBS="-llustreapi $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char llapi_layout_get_by_xattr ();
+int
+main ()
+{
+return llapi_layout_get_by_xattr ();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_lib_lustreapi_llapi_layout_get_by_xattr=yes
+else
+ ac_cv_lib_lustreapi_llapi_layout_get_by_xattr=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_lustreapi_llapi_layout_get_by_xattr" >&5
+$as_echo "$ac_cv_lib_lustreapi_llapi_layout_get_by_xattr" >&6; }
+if test "x$ac_cv_lib_lustreapi_llapi_layout_get_by_xattr" = xyes; then :
BUILD_LUSTRE_MODULE=1
+ DARSHAN_LUSTRE_LD_FLAGS="-llustreapi"
+else
+ if test "x$enable_lustre_mod" = xyes; then :
+ as_fn_error $? "Cannot find required llapi_layout_get_by_xattr function for the Lustre module" "$LINENO" 5
+fi
+fi
+
else
if test "x$enable_lustre_mod" = xyes; then :
as_fn_error $? "Cannot find required headers for the Lustre module" "$LINENO" 5
@@ -5067,6 +5100,7 @@ DARSHAN_VERSION="3.2.1"
+
ac_config_files="$ac_config_files Makefile darshan-mk-log-dirs.pl darshan-gen-cc.pl darshan-gen-cxx.pl darshan-gen-fortran.pl darshan-config share/craype-1.x/darshan-module share/craype-2.x/darshan-module lib/pkgconfig/darshan-runtime.pc share/mpi-profile/darshan-cc.conf share/mpi-profile/darshan-cxx.conf share/mpi-profile/darshan-f.conf share/mpi-profile/darshan-cc-static.conf share/mpi-profile/darshan-cxx-static.conf share/mpi-profile/darshan-f-static.conf share/ld-opts/darshan-base-ld-opts share/ld-opts/darshan-stdio-ld-opts share/ld-opts/darshan-hdf5-ld-opts"
=====================================
darshan-runtime/configure.in
=====================================
@@ -291,8 +291,13 @@ AC_ARG_ENABLE(lustre-mod,
[enable_lustre_mod=check])
# if lustre module not disabled, check for needed Lustre module header
if test x$enable_lustre_mod != xno; then
- AC_CHECK_HEADER([lustre/lustre_user.h],
- BUILD_LUSTRE_MODULE=1,
+ AC_CHECK_HEADER([lustre/lustreapi.h],
+ [AC_CHECK_LIB(lustreapi, llapi_layout_get_by_xattr,
+ [BUILD_LUSTRE_MODULE=1
+ DARSHAN_LUSTRE_LD_FLAGS="-llustreapi"],
+ AS_IF([test "x$enable_lustre_mod" = xyes],
+ AC_MSG_ERROR(Cannot find required llapi_layout_get_by_xattr function for the Lustre module),
+ []))],
AS_IF([test "x$enable_lustre_mod" = xyes],
AC_MSG_ERROR(Cannot find required headers for the Lustre module),
[]))
@@ -530,6 +535,7 @@ AC_SUBST(DARSHAN_HDF5_ADD_DFLUSH_LD_OPTS)
AC_SUBST(DARSHAN_HDF5_LD_FLAGS)
AC_SUBST(BUILD_BGQ_MODULE)
AC_SUBST(BUILD_LUSTRE_MODULE)
+AC_SUBST(DARSHAN_LUSTRE_LD_FLAGS)
AC_SUBST(BUILD_MDHIM_MODULE)
AC_SUBST(DARSHAN_MDHIM_LD_OPTS)
AC_OUTPUT(Makefile
=====================================
darshan-runtime/darshan-config.in
=====================================
@@ -14,13 +14,13 @@ DARSHAN_LOG_ENV="@__DARSHAN_LOG_ENV@"
# app used a library which in turn used one of those HLLs).
PRE_LD_FLAGS="-L$DARSHAN_LIB_PATH $DARSHAN_LD_FLAGS -ldarshan -lz -Wl,@$DARSHAN_SHARE_PATH/ld-opts/darshan-base-ld-opts"
-POST_LD_FLAGS="-L$DARSHAN_LIB_PATH -ldarshan -lz -lrt -lpthread"
+POST_LD_FLAGS="-L$DARSHAN_LIB_PATH -ldarshan @DARSHAN_LUSTRE_LD_FLAGS@ -lz -lrt -lpthread"
# NOTE:
# - when dynamic linking there is no need for wrapping options, we simply
# need to get the darshan symbol definitions early enough in the link
# order. We also set no-as-needed for linkers that may not identify
-DYN_LD_FLAGS="-L$DARSHAN_LIB_PATH $DARSHAN_LD_FLAGS -Wl,-rpath=$DARSHAN_LIB_PATH -Wl,-no-as-needed -ldarshan @DARSHAN_HDF5_LD_FLAGS@"
+DYN_LD_FLAGS="-L$DARSHAN_LIB_PATH $DARSHAN_LD_FLAGS -Wl,-rpath=$DARSHAN_LIB_PATH -Wl,-no-as-needed -ldarshan @DARSHAN_LUSTRE_LD_FLAGS@ @DARSHAN_HDF5_LD_FLAGS@"
# NOTE:
# - construct complete list of log path options, separated by commas.
=====================================
darshan-runtime/lib/darshan-lustre.c
=====================================
@@ -16,9 +16,10 @@
#include <stdlib.h>
#include <assert.h>
#include <pthread.h>
-#include <sys/ioctl.h>
+#include <limits.h>
+#include <sys/xattr.h>
-#include <lustre/lustre_user.h>
+#include <lustre/lustreapi.h>
#include "darshan.h"
#include "darshan-dynamic.h"
@@ -50,10 +51,6 @@ static int my_rank = -1;
#define LUSTRE_LOCK() pthread_mutex_lock(&lustre_runtime_mutex)
#define LUSTRE_UNLOCK() pthread_mutex_unlock(&lustre_runtime_mutex)
-#ifndef LOV_MAX_STRIPE_COUNT /* for Lustre < 2.4 */
- #define LOV_MAX_STRIPE_COUNT 2000
-#endif
-
void darshan_instrument_lustre_file(const char* filepath, int fd)
{
struct lustre_record_ref *rec_ref;
@@ -61,9 +58,12 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
struct darshan_fs_info fs_info;
darshan_record_id rec_id;
int i;
- struct lov_user_md *lum;
- size_t lumsize = sizeof(struct lov_user_md) +
- LOV_MAX_STRIPE_COUNT * sizeof(struct lov_user_ost_data);
+ void *lustre_xattr_val;
+ size_t lustre_xattr_size = XATTR_SIZE_MAX;
+ struct llapi_layout *lustre_layout;
+ uint64_t stripe_size;
+ uint64_t stripe_count;
+ uint64_t tmp_ost;
size_t rec_size;
int ret;
@@ -85,23 +85,40 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
&rec_id, sizeof(darshan_record_id));
if(!rec_ref)
{
- /* first issue LUSTRE ioctl to see if we can get stripe data */
-
- /* if we can't issue ioctl, we have no counter data at all */
- if ( (lum = calloc(1, lumsize)) == NULL )
+ if ( (lustre_xattr_val = calloc(1, lustre_xattr_size)) == NULL )
{
LUSTRE_UNLOCK();
return;
}
- /* find out the OST count of this file so we can allocate memory */
- lum->lmm_magic = LOV_USER_MAGIC;
- lum->lmm_stripe_count = LOV_MAX_STRIPE_COUNT;
+ /* -1 means fgetxattr failed, likely because file isn't on Lustre, but maybe because
+ * the Lustre version doesn't support this method of obtaining striping info
+ */
+ if ( (lustre_xattr_size = fgetxattr( fd, "lustre.lov", lustre_xattr_val, lustre_xattr_size)) == -1 )
+ {
+ free(lustre_xattr_val);
+ LUSTRE_UNLOCK();
+ return;
+ }
- /* -1 means ioctl failed, likely because file isn't on Lustre */
- if ( ioctl( fd, LL_IOC_LOV_GETSTRIPE, (void *)lum ) == -1 )
+ /* get corresponding Lustre file layout, then extract stripe params */
+ if ( (lustre_layout = llapi_layout_get_by_xattr(lustre_xattr_val, lustre_xattr_size, 0)) == NULL)
{
- free(lum);
+ free(lustre_xattr_val);
+ LUSTRE_UNLOCK();
+ return;
+ }
+ if (llapi_layout_stripe_size_get(lustre_layout, &stripe_size) == -1)
+ {
+ llapi_layout_free(lustre_layout);
+ free(lustre_xattr_val);
+ LUSTRE_UNLOCK();
+ return;
+ }
+ if (llapi_layout_stripe_count_get(lustre_layout, &stripe_count) == -1)
+ {
+ llapi_layout_free(lustre_layout);
+ free(lustre_xattr_val);
LUSTRE_UNLOCK();
return;
}
@@ -110,7 +127,8 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
rec_ref = malloc(sizeof(*rec_ref));
if(!rec_ref)
{
- free(lum);
+ llapi_layout_free(lustre_layout);
+ free(lustre_xattr_val);
LUSTRE_UNLOCK();
return;
}
@@ -120,12 +138,13 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
if(ret == 0)
{
free(rec_ref);
- free(lum);
+ llapi_layout_free(lustre_layout);
+ free(lustre_xattr_val);
LUSTRE_UNLOCK();
return;
}
- rec_size = LUSTRE_RECORD_SIZE( lum->lmm_stripe_count );
+ rec_size = LUSTRE_RECORD_SIZE( stripe_count );
/* register a Lustre file record with Darshan */
fs_info.fs_type = -1;
@@ -142,7 +161,8 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
darshan_delete_record_ref(&(lustre_runtime->record_id_hash),
&rec_id, sizeof(darshan_record_id));
free(rec_ref);
- free(lum);
+ llapi_layout_free(lustre_layout);
+ free(lustre_xattr_val);
LUSTRE_UNLOCK();
return;
}
@@ -161,12 +181,25 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
rec->counters[LUSTRE_MDTS] = -1;
}
- rec->counters[LUSTRE_STRIPE_SIZE] = lum->lmm_stripe_size;
- rec->counters[LUSTRE_STRIPE_WIDTH] = lum->lmm_stripe_count;
- rec->counters[LUSTRE_STRIPE_OFFSET] = lum->lmm_stripe_offset;
- for ( i = 0; i < lum->lmm_stripe_count; i++ )
- rec->ost_ids[i] = lum->lmm_objects[i].l_ost_idx;
- free(lum);
+ rec->counters[LUSTRE_STRIPE_SIZE] = stripe_size;
+ rec->counters[LUSTRE_STRIPE_WIDTH] = stripe_count;
+ rec->counters[LUSTRE_STRIPE_OFFSET] = -1; // no longer captured
+ for ( i = 0; i < stripe_count; i++ )
+ {
+ if (llapi_layout_ost_index_get(lustre_layout, i, &tmp_ost) == -1)
+ {
+ darshan_delete_record_ref(&(lustre_runtime->record_id_hash),
+ &rec_id, sizeof(darshan_record_id));
+ free(rec_ref);
+ llapi_layout_free(lustre_layout);
+ free(lustre_xattr_val);
+ LUSTRE_UNLOCK();
+ return;
+ }
+ rec->ost_ids[i] = (int64_t)tmp_ost;
+ }
+ free(lustre_xattr_val);
+ llapi_layout_free(lustre_layout);
rec->base_rec.id = rec_id;
rec->base_rec.rank = my_rank;
=====================================
darshan-runtime/lib/pkgconfig/darshan-runtime.pc.in
=====================================
@@ -15,5 +15,5 @@ darshan_libdir= -L${darshan_prefix}/lib
darshan_linkopts="-Wl,@${darshan_share}/ld-opts/darshan-base-ld-opts"
Cflags:
-Libs: ${darshan_libdir} -Wl,-rpath=${darshan_prefix}/lib -Wl,-no-as-needed -ldarshan @DARSHAN_HDF5_LD_FLAGS@
-Libs.private: ${darshan_linkopts} ${darshan_libdir} -lfmpich -lmpichcxx -ldarshan
+Libs: ${darshan_libdir} -Wl,-rpath=${darshan_prefix}/lib -Wl,-no-as-needed -ldarshan @DARSHAN_LUSTRE_LD_FLAGS@ @DARSHAN_HDF5_LD_FLAGS@
+Libs.private: ${darshan_linkopts} ${darshan_libdir} -lfmpich -lmpichcxx -ldarshan @DARSHAN_LUSTRE_LD_FLAGS@ -lz -lrt -lpthread
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/-/compare/7d4a80abaa2f2bfcc767f4f988fbf9dafff95926...e245da9d371d38def7625f7b9a1c53d8afa91f45
--
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/-/compare/7d4a80abaa2f2bfcc767f4f988fbf9dafff95926...e245da9d371d38def7625f7b9a1c53d8afa91f45
You're receiving this email because of your account on xgitlab.cels.anl.gov.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20210318/327fd511/attachment-0001.html>
More information about the Darshan-commits
mailing list