[mpich2-dev] [PATCH 1/1] Issue 4120: Fix mpid_type_debug alignment problem.
Rob Ross
rross at mcs.anl.gov
Wed Mar 19 13:01:32 CDT 2008
Ah, looking at mpid_datatype_contents.c I see what is going on.
Here's what happened. I naively wrote the code that stores the
contents w/out doing padding years ago. Then on some system or another
that blew up -- we needed the padding to correctly access types.
When I updated the code, I appear to have missed the use of these
pointers in mpid_type_debug.c, but where I was testing it must have
been fine.
I think we can just fix the debug routines to use the same logic as in
mpid_datatype_contig WRT padding? That would avoid us allocating and
freeing space in the debug routines.
Rob
On Mar 19, 2008, at 10:55 AM, Bob Cernohous wrote:
>
> I was trying to dump out some datatypes with mpid_type_debug
> routines and got garbage.
>
> It looks like it can't just calculate pointer offsets like it was
> doing. There's 8 byte padding
> on the datatype structure and the arrays of types/ints/aints.
> That's not something
> I added or changed. I just fixed debug by calling
> MPIDI_Datatype_get_contents_xxxx
> which calculates the correct offsets (with alignment padding) using
> code like:
>
> int align_sz = 8, epsilon;
>
> struct_sz = sizeof(MPID_Datatype_contents);
> types_sz = cp->nr_types * sizeof(MPI_Datatype);
>
> /* pad the struct, types, and ints before we allocate.
> *
> * note: it's not necessary that we pad the aints,
> * because they are last in the region.
> */
> if ((epsilon = struct_sz % align_sz)) {
> struct_sz += align_sz - epsilon;
> }
> if ((epsilon = types_sz % align_sz)) {
> types_sz += align_sz - epsilon;
> }
>
> ptr = ((char *) cp) + struct_sz + types_sz;
>
> rather than simply doing:
>
> > > - ints = (int *) (((char *) types) +
> > > - cp->nr_types * sizeof(MPI_Datatype));
>
> owner-mpich2-dev at mcs.anl.gov wrote on 03/19/2008 09:23:16 AM:
>
> > Hi Bob,
> >
> > You're saying that we can't just point at the values because they
> > aren't aligned, that they would need to be aligned if we were
> going to
> > use them in this way?
> >
> > Thanks,
> >
> > Rob
> >
> > On Mar 19, 2008, at 9:11 AM, Bob Cernohous wrote:
> >
> > > Can't just point to ints/aints in the datatype. Have to figure
> out
> > > the
> > > alignment. Easiest way is to call
> MPIDI_Datatype_get_contents_xxxx().
> > >
> > > Signed-off-by: Bob Cernohous <bobc at us.ibm.com>
> > > ---
> > > .../src/mpid/common/datatype/mpid_type_debug.c | 46 +++++++
> ++++
> > > ++------
> > > 1 files changed, 31 insertions(+), 15 deletions(-)
> > >
> > > diff --git a/lib/mpi/mpich2/src/mpid/common/datatype/
> > > mpid_type_debug.c b/lib/mpi/mpich2/src/mpid/common/datatype/
> > > mpid_type_debug.c
> > > index 1e3c761..71fafb7 100644
> > > --- a/lib/mpi/mpich2/src/mpid/common/datatype/mpid_type_debug.c
> > > +++ b/lib/mpi/mpich2/src/mpid/common/datatype/mpid_type_debug.c
> > > @@ -494,6 +494,12 @@ static char *MPIDI_Datatype_depth_spacing(int
> > > depth)
> > > default: return d5;
> > > }
> > > }
> > > +
> > > +#define __mpidi_datatype_free_and_return { \
> > > + if (cp->nr_ints > 0) MPIU_Free(ints ); \
> > > + if (cp->nr_aints > 0) MPIU_Free(aints); \
> > > + if (cp->nr_types > 0) MPIU_Free(types); \
> > > + return; }
> > >
> > > void MPIDI_Datatype_contents_printf(MPI_Datatype type,
> > > int depth,
> > > @@ -522,12 +528,22 @@ void
> > > MPIDI_Datatype_contents_printf(MPI_Datatype type,
> > > return;
> > > }
> > >
> > > - types = (MPI_Datatype *) (((char *) cp) +
> > > - sizeof(MPID_Datatype_contents));
> > > - ints = (int *) (((char *) types) +
> > > - cp->nr_types * sizeof(MPI_Datatype));
> > > - aints = (MPI_Aint *) (((char *) ints) +
> > > - cp->nr_ints * sizeof(int));
> > > + if (cp->nr_ints > 0)
> > > + {
> > > + ints = (int*) MPIU_Malloc(cp->nr_ints * sizeof(int));
> > > + MPIDI_Datatype_get_contents_ints(cp, ints);
> > > + }
> > > +
> > > + if (cp->nr_aints > 0) {
> > > + aints = (MPI_Aint*) MPIU_Malloc(cp->nr_aints *
> > > sizeof(MPI_Aint));
> > > + MPIDI_Datatype_get_contents_aints(cp, aints);
> > > + }
> > > +
> > > + if (cp->nr_types > 0) {
> > > + types = (MPI_Datatype*) MPIU_Malloc(cp->nr_types *
> > > sizeof(MPI_Datatype));
> > > + MPIDI_Datatype_get_contents_types(cp, types);
> > > + }
> > > +
> > >
> > > MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %scombiner: %s",
> > > MPIDI_Datatype_depth_spacing(depth),
> > > @@ -536,10 +552,10 @@ void
> > > MPIDI_Datatype_contents_printf(MPI_Datatype type,
> > > switch (cp->combiner) {
> > > case MPI_COMBINER_NAMED:
> > > case MPI_COMBINER_DUP:
> > > - return;
> > > + __mpidi_datatype_free_and_return;
> > > case MPI_COMBINER_RESIZED:
> > > /* not done */
> > > - return;
> > > + __mpidi_datatype_free_and_return;
> > > case MPI_COMBINER_CONTIGUOUS:
> > > MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %scontig ct
> = %d\n",
> > > MPIDI_Datatype_depth_spacing(depth),
> > > @@ -547,7 +563,7 @@ void
> MPIDI_Datatype_contents_printf(MPI_Datatype
> > > type,
> > > MPIDI_Datatype_contents_printf(*types,
> > > depth + 1,
> > > acount);
> > > - return;
> > > + __mpidi_datatype_free_and_return;
> > > case MPI_COMBINER_VECTOR:
> > > MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,
> > > "# %svector ct = %d, blk = %d, str = %d\n",
> > > @@ -558,7 +574,7 @@ void
> MPIDI_Datatype_contents_printf(MPI_Datatype
> > > type,
> > > MPIDI_Datatype_contents_printf(*types,
> > > depth + 1,
> > > acount);
> > > - return;
> > > + __mpidi_datatype_free_and_return;
> > > case MPI_COMBINER_HVECTOR:
> > > MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,
> > > "# %shvector ct = %d, blk = %d, str = "
> > > MPI_AINT_FMT_DEC_SPEC "\n",
> > > @@ -569,7 +585,7 @@ void
> MPIDI_Datatype_contents_printf(MPI_Datatype
> > > type,
> > > MPIDI_Datatype_contents_printf(*types,
> > > depth + 1,
> > > acount);
> > > - return;
> > > + __mpidi_datatype_free_and_return;
> > > case MPI_COMBINER_INDEXED:
> > > MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %sindexed ct
> = %d:",
> > > MPIDI_Datatype_depth_spacing(depth),
> > > @@ -585,7 +601,7 @@ void
> MPIDI_Datatype_contents_printf(MPI_Datatype
> > > type,
> > > depth + 1,
> > > acount);
> > > }
> > > - return;
> > > + __mpidi_datatype_free_and_return;
> > > case MPI_COMBINER_HINDEXED:
> > > MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %shindexed
> ct =
> > > %d:",
> > > MPIDI_Datatype_depth_spacing(depth),
> > > @@ -601,7 +617,7 @@ void
> MPIDI_Datatype_contents_printf(MPI_Datatype
> > > type,
> > > depth + 1,
> > > acount);
> > > }
> > > - return;
> > > + __mpidi_datatype_free_and_return;
> > > case MPI_COMBINER_STRUCT:
> > > MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %sstruct ct
> = %d:",
> > > MPIDI_Datatype_depth_spacing(depth),
> > > @@ -617,11 +633,11 @@ void
> > > MPIDI_Datatype_contents_printf(MPI_Datatype type,
> > > depth + 1,
> > > acount);
> > > }
> > > - return;
> > > + __mpidi_datatype_free_and_return;
> > > default:
> > > MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %sunhandled
> > > combiner",
> > > MPIDI_Datatype_depth_spacing(depth)));
> > > - return;
> > > + __mpidi_datatype_free_and_return;
> > > }
> > > }
> > > /* --END ERROR HANDLING-- */
> > > --
> > > 1.5.3.7
> > >
> >
More information about the mpich2-dev
mailing list