[Darshan-commits] [Git][darshan/darshan][master] 2 commits: this probably shouldn't have been checked in
Glenn K. Lockwood
xgitlab at cels.anl.gov
Tue Jan 24 20:38:23 CST 2017
Glenn K. Lockwood pushed to branch master at darshan / darshan
Commits:
2b1649df by Glenn K. Lockwood at 2017-01-24T18:29:43-08:00
this probably shouldn't have been checked in
- - - - -
c95a5d88 by Glenn K. Lockwood at 2017-01-24T18:36:18-08:00
fixes #214 (LOV_MAX_STRIPE_COUNT not exposed in lustre_user.h for Lustre < 2.4)
- - - - -
2 changed files:
- darshan-runtime/lib/darshan-lustre.c
- − darshan-runtime/lib/darshan-lustre_old.c
Changes:
=====================================
darshan-runtime/lib/darshan-lustre.c
=====================================
--- a/darshan-runtime/lib/darshan-lustre.c
+++ b/darshan-runtime/lib/darshan-lustre.c
@@ -47,6 +47,9 @@ static int my_rank = -1;
#define LUSTRE_LOCK() pthread_mutex_lock(&lustre_runtime_mutex)
#define LUSTRE_UNLOCK() pthread_mutex_unlock(&lustre_runtime_mutex)
+#ifndef LOV_MAX_STRIPE_COUNT /* for Lustre < 2.4 */
+ #define LOV_MAX_STRIPE_COUNT 2000
+#endif
void darshan_instrument_lustre_file(const char* filepath, int fd)
{
struct lustre_record_ref *rec_ref;
=====================================
darshan-runtime/lib/darshan-lustre_old.c deleted
=====================================
--- a/darshan-runtime/lib/darshan-lustre_old.c
+++ /dev/null
@@ -1,543 +0,0 @@
-/*
- * Copyright (C) 2015 University of Chicago.
- * See COPYRIGHT notice in top-level directory.
- *
- */
-
-#define _XOPEN_SOURCE 500
-#define _GNU_SOURCE
-
-#include "darshan-runtime-config.h"
-#include <stdio.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <time.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <pthread.h>
-#include <sys/ioctl.h>
-
-/* XXX stick this into autoconf .h */
-#include <lustre/lustre_user.h>
-
-#include "uthash.h"
-
-#include "darshan.h"
-#include "darshan-dynamic.h"
-#include "darshan-lustre.h"
-
-struct lustre_runtime *lustre_runtime = NULL;
-static pthread_mutex_t lustre_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
-static int instrumentation_disabled = 0;
-static int my_rank = -1;
-
-static void lustre_runtime_initialize(void);
-
-static void lustre_begin_shutdown(void);
-static void lustre_get_output_data(MPI_Comm mod_comm, darshan_record_id *shared_recs,
- int shared_rec_count, void **lustre_buf, int *lustre_buf_sz);
-static void lustre_shutdown(void);
-static int lustre_record_compare(const void* a_p, const void* b_p);
-static void lustre_record_reduction_op(void* infile_v, void* inoutfile_v,
- int *len, MPI_Datatype *datatype);
-
-#define LUSTRE_LOCK() pthread_mutex_lock(&lustre_runtime_mutex)
-#define LUSTRE_UNLOCK() pthread_mutex_unlock(&lustre_runtime_mutex)
-#define LUSTRE_RECORD_SIZE( osts ) ( sizeof(struct darshan_lustre_record) + sizeof(int64_t) * (osts - 1) )
-
-void darshan_instrument_lustre_file(const char* filepath, int fd)
-{
- struct lustre_record_runtime *rec_rt;
- struct darshan_lustre_record *rec;
- struct darshan_fs_info fs_info;
- darshan_record_id rec_id;
- int limit_flag;
- int i;
- struct lov_user_md *lum;
- size_t lumsize = sizeof(struct lov_user_md) +
- LOV_MAX_STRIPE_COUNT * sizeof(struct lov_user_ost_data);
- size_t rec_size;
-
- LUSTRE_LOCK();
- /* make sure the lustre module is already initialized */
- lustre_runtime_initialize();
-
- /* if we can't issue ioctl, we have no counter data at all */
- if ( (lum = calloc(1, lumsize)) == NULL )
- return;
-
- /* find out the OST count of this file so we can allocate memory */
- lum->lmm_magic = LOV_USER_MAGIC;
- lum->lmm_stripe_count = LOV_MAX_STRIPE_COUNT;
-
- /* -1 means ioctl failed, likely because file isn't on Lustre */
- if ( ioctl( fd, LL_IOC_LOV_GETSTRIPE, (void *)lum ) == -1 )
- {
- free(lum);
- return;
- }
-
- rec_size = LUSTRE_RECORD_SIZE( lum->lmm_stripe_count );
-
- {
- /* broken out for clarity */
- void *end_of_new_record = (char*)lustre_runtime->next_free_record + rec_size;
- void *end_of_rec_buffer = (char*)lustre_runtime->record_buffer + lustre_runtime->record_buffer_max;
- limit_flag = ( end_of_new_record > end_of_rec_buffer );
- }
-
- /* register a Lustre file record with Darshan */
- fs_info.fs_type = -1;
- darshan_core_register_record(
- (void *)filepath,
- strlen(filepath),
- DARSHAN_LUSTRE_MOD,
- 1,
- limit_flag,
- &rec_id,
- &fs_info);
-
- /* if record id is 0, darshan has no more memory for instrumenting */
- if(rec_id == 0)
- {
- free(lum);
- LUSTRE_UNLOCK();
- return;
- }
-
- /* search the hash table for this file record, and initialize if not found */
- HASH_FIND(hlink, lustre_runtime->record_runtime_hash, &rec_id, sizeof(darshan_record_id), rec_rt );
- if ( !rec_rt ) {
- /* allocate a new lustre record and append it to the array */
- rec_rt = &(lustre_runtime->record_runtime_array[lustre_runtime->record_count]);
- rec_rt->record = lustre_runtime->next_free_record;
- rec_rt->record_size = rec_size;
- lustre_runtime->next_free_record = (char*)(lustre_runtime->next_free_record) + rec_size;
- lustre_runtime->record_buffer_used += rec_size;
- rec = rec_rt->record;
- rec->rec_id = rec_id;
- rec->rank = my_rank;
-
- /* implicit assumption here that none of these counters will change
- * after the first time a file is opened. This may not always be
- * true in the future */
- if ( fs_info.fs_type != -1 )
- {
- rec->counters[LUSTRE_OSTS] = fs_info.ost_count;
- rec->counters[LUSTRE_MDTS] = fs_info.mdt_count;
- }
- else
- {
- rec->counters[LUSTRE_OSTS] = -1;
- rec->counters[LUSTRE_MDTS] = -1;
- }
-
- rec->counters[LUSTRE_STRIPE_SIZE] = lum->lmm_stripe_size;
- rec->counters[LUSTRE_STRIPE_WIDTH] = lum->lmm_stripe_count;
- rec->counters[LUSTRE_STRIPE_OFFSET] = lum->lmm_stripe_offset;
- for ( i = 0; i < lum->lmm_stripe_count; i++ )
- rec->ost_ids[i] = lum->lmm_objects[i].l_ost_idx;
- free(lum);
-
- HASH_ADD(hlink, lustre_runtime->record_runtime_hash, record->rec_id, sizeof(darshan_record_id), rec_rt);
-
- lustre_runtime->record_count++;
- }
-
- LUSTRE_UNLOCK();
- return;
-}
-
-static void lustre_runtime_initialize()
-{
- int mem_limit;
- int max_records;
- struct darshan_module_funcs lustre_mod_fns =
- {
- .begin_shutdown = &lustre_begin_shutdown,
- .get_output_data = &lustre_get_output_data,
- .shutdown = &lustre_shutdown
- };
-
- /* don't do anything if already initialized or instrumenation is disabled */
- if(lustre_runtime || instrumentation_disabled)
- return;
-
- /* register the lustre module with darshan-core */
- darshan_core_register_module(
- DARSHAN_LUSTRE_MOD,
- &lustre_mod_fns,
- &my_rank,
- &mem_limit,
- NULL);
-
- /* return if no memory assigned by darshan core */
- if(mem_limit == 0)
- return;
-
- lustre_runtime = malloc(sizeof(*lustre_runtime));
- if(!lustre_runtime)
- return;
- memset(lustre_runtime, 0, sizeof(*lustre_runtime));
-
- /* allocate the full size of the memory limit we are given */
- lustre_runtime->record_buffer= malloc(mem_limit);
- if(!lustre_runtime->record_buffer)
- {
- lustre_runtime->record_buffer_max = 0;
- return;
- }
- lustre_runtime->record_buffer_max = mem_limit;
- lustre_runtime->next_free_record = lustre_runtime->record_buffer;
- memset(lustre_runtime->record_buffer, 0, lustre_runtime->record_buffer_max);
-
- /* Allocate array of Lustre runtime data. We calculate the maximum possible
- * number of records that will fit into mem_limit by assuming that each
- * record has the minimum possible OST count, then allocate that many
- * runtime records. record_buffer will always run out of memory before
- * we overflow record_runtime_array.
- */
- max_records = mem_limit / sizeof(struct darshan_lustre_record);
- lustre_runtime->record_runtime_array =
- malloc( max_records * sizeof(struct lustre_record_runtime));
- if(!lustre_runtime->record_runtime_array)
- {
- lustre_runtime->record_buffer_max = 0;
- free( lustre_runtime->record_buffer );
- return;
- }
- memset(lustre_runtime->record_runtime_array, 0,
- max_records * sizeof(struct lustre_record_runtime));
-
- return;
-}
-
-/**************************************************************************
- * Functions exported by Lustre module for coordinating with darshan-core *
- **************************************************************************/
-
-static void lustre_begin_shutdown(void)
-{
- assert(lustre_runtime);
-
- LUSTRE_LOCK();
- /* disable further instrumentation while Darshan shuts down */
- instrumentation_disabled = 1;
- LUSTRE_UNLOCK();
-
- return;
-}
-
-static void lustre_get_output_data(
- MPI_Comm mod_comm,
- darshan_record_id *shared_recs,
- int shared_rec_count,
- void **lustre_buf,
- int *lustre_buf_sz)
-{
- struct lustre_record_runtime *file;
- int i, ishared;
- int *rec_lengths;
- size_t shared_rec_size;
- struct darshan_lustre_record *red_send_buf = NULL;
- struct darshan_lustre_record *red_recv_buf = NULL;
- MPI_Datatype red_type;
- MPI_Aint *rec_offsets;
- MPI_Op red_op;
-
- assert(lustre_runtime);
-
- /* if there are globally shared files, do a shared file reduction */
- /* NOTE: the shared file reduction is also skipped if the
- * DARSHAN_DISABLE_SHARED_REDUCTION environment variable is set.
- */
- if (shared_rec_count && !getenv("DARSHAN_DISABLE_SHARED_REDUCTION"))
- {
- /* necessary initialization of shared records */
- for(i = 0; i < shared_rec_count; i++)
- {
- HASH_FIND(hlink, lustre_runtime->record_runtime_hash, &shared_recs[i],
- sizeof(darshan_record_id), file);
- assert(file);
-
- file->record->rank = -1;
- }
-
- /* sort the array of files descending by rank so that we get all of the
- * shared files (marked by rank -1) in a contiguous portion at end
- * of the array
- */
- sort_lustre_records();
-
- /* make red_send_buf point to the first shared-file record */
- ishared = lustre_runtime->record_count - shared_rec_count;
- red_send_buf =
- (lustre_runtime->record_runtime_array[ishared]).record;
-
- /* allocate memory for the reduction output on rank 0 */
- if (my_rank == 0)
- {
- shared_rec_size = lustre_runtime->record_buffer_used - ((char*)red_send_buf - (char*)lustre_runtime->record_buffer);
- red_recv_buf = malloc(shared_rec_size);
- if (!red_recv_buf)
- return;
- }
-
- /* need to build rec_lengths (array of ints) and rec_offsets (array of ints) */
- rec_lengths = malloc(sizeof(*rec_lengths) * shared_rec_count);
- rec_offsets = malloc(sizeof(*rec_offsets) * shared_rec_count);
- for ( i = ishared; i < shared_rec_count; i ++ )
- {
- rec_lengths[i] = (lustre_runtime->record_runtime_array[i]).record_size;
- rec_offsets[i] = (char*)((lustre_runtime->record_runtime_array[i]).record) -
- (char*)((lustre_runtime->record_runtime_array[ishared]).record);
- }
-
- /* ... */
- DARSHAN_MPI_CALL(PMPI_Type_hindexed)(
- shared_rec_count,
- rec_lengths,
- rec_offsets,
- MPI_BYTE,
- &red_type
- );
- DARSHAN_MPI_CALL(PMPI_Type_commit)(&red_type);
- DARSHAN_MPI_CALL(PMPI_Op_create)(lustre_record_reduction_op, 1, &red_op);
- DARSHAN_MPI_CALL(PMPI_Reduce)(red_send_buf, red_recv_buf,
- shared_rec_count, red_type, red_op, 0, mod_comm);
-
- /* clean up reduction state */
- if (my_rank == 0)
- {
- memcpy(&(lustre_runtime->record_buffer[ishared]), red_recv_buf,
- shared_rec_size);
- free(red_recv_buf);
- }
- else
- {
- lustre_runtime->record_count -= shared_rec_count;
- }
- free(rec_lengths);
- free(rec_offsets);
- DARSHAN_MPI_CALL(PMPI_Type_free)(&red_type);
- DARSHAN_MPI_CALL(PMPI_Op_free)(&red_op);
- }
-
- *lustre_buf = (void *)(lustre_runtime->record_buffer);
- *lustre_buf_sz = lustre_runtime->record_buffer_used;
-
- return;
-}
-
-static void lustre_shutdown(void)
-{
- assert(lustre_runtime);
-
- HASH_CLEAR(hlink, lustre_runtime->record_runtime_hash);
- free(lustre_runtime->record_runtime_array);
- free(lustre_runtime->record_buffer);
- free(lustre_runtime);
- lustre_runtime = NULL;
-
- return;
-}
-
-/* compare function for sorting file records by descending rank */
-static int lustre_record_compare(const void* a_p, const void* b_p)
-{
- const struct lustre_record_runtime* a = a_p;
- const struct lustre_record_runtime* b = b_p;
-
- if (a->record->rank < b->record->rank)
- return 1;
- if (a->record->rank > b->record->rank)
- return -1;
-
- /* if ( a->record->rank == b->record->rank ) we MUST do a secondary
- * sort so that the order of qsort is fully deterministic and consistent
- * across all MPI ranks. Without a secondary sort, the sort order can
- * be affected by rank-specific variations (e.g., the order in which
- * files are first opened).
- */
-
- return 0;
-}
-
-/*
- * Sort the record_runtimes and records by MPI rank to facilitate shared redux.
- * This requires craftiness and additional heap utilization because the records
- * (but not record_runtimes) have variable size. Currently has to temporarily
- * duplicate the entire record_buffer; there is room for more memory-efficient
- * optimization if this becomes a scalability issue.
- */
-int sort_lustre_records()
-{
- int i;
- struct darshan_lustre_record *rec;
- struct lustre_record_runtime *rec_rt, *tmp_rec_rt;
- char *new_buf, *p;
-
- /* Create a new buffer to store an entire replica of record_buffer. Since
- * we know the exact size of record_buffer's useful data at this point, we
- * can allocate the exact amount we need instead of record_buffer_max */
- new_buf = malloc(lustre_runtime->record_buffer_used);
- p = new_buf;
- if ( !new_buf )
- return 1;
-
- /* qsort breaks the hash table, so delete it now to free its memory buffers
- * and prevent later confusion */
- HASH_ITER( hlink, lustre_runtime->record_runtime_hash, rec_rt, tmp_rec_rt )
- HASH_DELETE( hlink, lustre_runtime->record_runtime_hash, rec_rt );
-
- /* sort the runtime records, which is has fixed-length elements */
- qsort(
- lustre_runtime->record_runtime_array,
- lustre_runtime->record_count,
- sizeof(struct lustre_record_runtime),
- lustre_record_compare
- );
-
- /* rebuild the hash and array with the qsorted runtime records */
- for ( i = 0; i < lustre_runtime->record_count; i++ )
- {
- rec_rt = &(lustre_runtime->record_runtime_array[i]);
- HASH_ADD(hlink, lustre_runtime->record_runtime_hash, record->rec_id, sizeof(darshan_record_id), rec_rt );
- }
-
- /* create reordered record buffer, then copy it back in place */
- for ( i = 0; i < lustre_runtime->record_count; i++ )
- {
- rec_rt = &(lustre_runtime->record_runtime_array[i]);
- memcpy( p, rec_rt->record, rec_rt->record_size );
- /* fix record pointers within each runtime record too - pre-emptively
- * point them at where they will live in record_buffer after we memcpy
- * below */
- rec_rt->record = (struct darshan_lustre_record *)((char*)(lustre_runtime->record_buffer) + (p - new_buf));
-
- p += rec_rt->record_size;
- }
- memcpy(
- lustre_runtime->record_buffer,
- new_buf,
- lustre_runtime->record_buffer_used );
-
- free(new_buf);
- return 0;
-}
-
-/* this is just boilerplate reduction code that isn't currently used */
-static void lustre_record_reduction_op(void* infile_v, void* inoutfile_v,
- int *len, MPI_Datatype *datatype)
-{
- struct darshan_lustre_record tmp_record;
- struct darshan_lustre_record *infile = infile_v;
- struct darshan_lustre_record *inoutfile = inoutfile_v;
- int i, j;
-
- assert(lustre_runtime);
-
- for( i=0; i<*len; i++ )
- {
- memset(&tmp_record, 0, sizeof(struct darshan_lustre_record));
- tmp_record.rec_id = infile->rec_id;
- tmp_record.rank = -1;
-
- /* preserve only rank 0's value */
- for( j = LUSTRE_OSTS; j < LUSTRE_NUM_INDICES; j++)
- {
- if ( my_rank == 0 )
- {
- tmp_record.counters[j] = infile->counters[j];
- }
- else
- {
- tmp_record.counters[j] = inoutfile->counters[j];
- }
- }
-
- /* update pointers */
- *inoutfile = tmp_record;
- inoutfile++;
- infile++;
- }
-
- return;
-}
-
-/*
- * Dump the memory structure of our records and runtime records
- */
-void print_lustre_runtime( void )
-{
- int i, j;
- struct darshan_lustre_record *rec;
-
- /* print what we just loaded */
- for ( i = 0; i < lustre_runtime->record_count; i++ )
- {
- rec = (lustre_runtime->record_runtime_array[i]).record;
- printf( "File %2d\n", i );
- for ( j = 0; j < LUSTRE_NUM_INDICES; j++ )
- {
- printf( " Counter %-2d: %10ld, addr %ld\n",
- j,
- rec->counters[j],
- (char*)(&(rec->counters[j])) - (char*)(lustre_runtime->record_buffer) );
- }
- for ( j = 0; j < rec->counters[LUSTRE_STRIPE_WIDTH]; j++ )
- {
- if ( j > 0 && j % 2 == 0 ) printf("\n");
- printf( " Stripe %-2d: %10ld, addr %-9d",
- j,
- rec->ost_ids[j],
- (char*)(&(rec->ost_ids[j])) - (char*)(lustre_runtime->record_buffer) );
- }
- printf( "\n" );
- }
- return;
-}
-
-/*
- * Dump the order in which records appear in memory
- */
-void print_array( void )
-{
- int i;
- struct lustre_record_runtime *rec_rt;
- printf("*** DUMPING RECORD LIST BY ARRAY SEQUENCE\n");
- for ( i = 0; i < lustre_runtime->record_count; i++ )
- {
- rec_rt = &(lustre_runtime->record_runtime_array[i]);
- printf( "*** record %d rank %d osts %d\n",
- rec_rt->record->rec_id,
- rec_rt->record->rank,
- rec_rt->record->counters[LUSTRE_STRIPE_WIDTH]);
- }
-}
-void print_hash( void )
-{
- struct lustre_record_runtime *rec_rt, *tmp_rec_rt;
- printf("*** DUMPING RECORD LIST BY HASH SEQUENCE\n");
- HASH_ITER( hlink, lustre_runtime->record_runtime_hash, rec_rt, tmp_rec_rt )
- {
- printf( "*** record %d rank %d osts %d\n",
- rec_rt->record->rec_id,
- rec_rt->record->rank,
- rec_rt->record->counters[LUSTRE_STRIPE_WIDTH]);
- }
- return;
-}
-
-
-
-
-/*
- * Local variables:
- * c-indent-level: 4
- * c-basic-offset: 4
- * End:
- *
- * vim: ts=8 sts=4 sw=4 expandtab
- */
View it on GitLab: https://xgitlab.cels.anl.gov/darshan/darshan/compare/1561ab231dbda237c064146d05019213dc74c7f8...c95a5d88b871c72b6c092f58985235321d52f747
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/darshan-commits/attachments/20170124/ee9c56f4/attachment-0001.html>
More information about the Darshan-commits
mailing list