[mpich2-dev] [PATCH 1/1] Issue 4120: Fix mpid_type_debug alignment problem.
Bob Cernohous
bobc at us.ibm.com
Wed Mar 19 13:32:29 CDT 2008
> I think we can just fix the debug routines to use the same logic as in
> mpid_datatype_contig WRT padding? That would avoid us allocating and
> freeing space in the debug routines.
That's fine. I considered it but since it's debug code I wasn't really
worried about alloc/free.
Bob Cernohous: (T/L 553) 507-253-6093
BobC at us.ibm.com
IBM Rochester, Building 030-2(C335), Department 61L
3605 Hwy 52 North, Rochester, MN 55901-7829
> Chaos reigns within.
> Reflect, repent, and reboot.
> Order shall return.
owner-mpich2-dev at mcs.anl.gov wrote on 03/19/2008 01:01:32 PM:
> Ah, looking at mpid_datatype_contents.c I see what is going on.
>
> Here's what happened. I naively wrote the code that stores the
> contents w/out doing padding years ago. Then on some system or another
> that blew up -- we needed the padding to correctly access types.
>
> When I updated the code, I appear to have missed the use of these
> pointers in mpid_type_debug.c, but where I was testing it must have
> been fine.
>
> I think we can just fix the debug routines to use the same logic as in
> mpid_datatype_contig WRT padding? That would avoid us allocating and
> freeing space in the debug routines.
>
> Rob
>
> On Mar 19, 2008, at 10:55 AM, Bob Cernohous wrote:
>
> >
> > I was trying to dump out some datatypes with mpid_type_debug
> > routines and got garbage.
> >
> > It looks like it can't just calculate pointer offsets like it was
> > doing. There's 8 byte padding
> > on the datatype structure and the arrays of types/ints/aints.
> > That's not something
> > I added or changed. I just fixed debug by calling
> > MPIDI_Datatype_get_contents_xxxx
> > which calculates the correct offsets (with alignment padding) using
> > code like:
> >
> > int align_sz = 8, epsilon;
> >
> > struct_sz = sizeof(MPID_Datatype_contents);
> > types_sz = cp->nr_types * sizeof(MPI_Datatype);
> >
> > /* pad the struct, types, and ints before we allocate.
> > *
> > * note: it's not necessary that we pad the aints,
> > * because they are last in the region.
> > */
> > if ((epsilon = struct_sz % align_sz)) {
> > struct_sz += align_sz - epsilon;
> > }
> > if ((epsilon = types_sz % align_sz)) {
> > types_sz += align_sz - epsilon;
> > }
> >
> > ptr = ((char *) cp) + struct_sz + types_sz;
> >
> > rather than simply doing:
> >
> > > > - ints = (int *) (((char *) types) +
> > > > - cp->nr_types * sizeof(MPI_Datatype));
> >
> > owner-mpich2-dev at mcs.anl.gov wrote on 03/19/2008 09:23:16 AM:
> >
> > > Hi Bob,
> > >
> > > You're saying that we can't just point at the values because they
> > > aren't aligned, that they would need to be aligned if we were
> > going to
> > > use them in this way?
> > >
> > > Thanks,
> > >
> > > Rob
> > >
> > > On Mar 19, 2008, at 9:11 AM, Bob Cernohous wrote:
> > >
> > > > Can't just point to ints/aints in the datatype. Have to figure
> > out
> > > > the
> > > > alignment. Easiest way is to call
> > MPIDI_Datatype_get_contents_xxxx().
> > > >
> > > > Signed-off-by: Bob Cernohous <bobc at us.ibm.com>
> > > > ---
> > > > .../src/mpid/common/datatype/mpid_type_debug.c | 46 +++++++
> > ++++
> > > > ++------
> > > > 1 files changed, 31 insertions(+), 15 deletions(-)
> > > >
> > > > diff --git a/lib/mpi/mpich2/src/mpid/common/datatype/
> > > > mpid_type_debug.c b/lib/mpi/mpich2/src/mpid/common/datatype/
> > > > mpid_type_debug.c
> > > > index 1e3c761..71fafb7 100644
> > > > --- a/lib/mpi/mpich2/src/mpid/common/datatype/mpid_type_debug.c
> > > > +++ b/lib/mpi/mpich2/src/mpid/common/datatype/mpid_type_debug.c
> > > > @@ -494,6 +494,12 @@ static char *MPIDI_Datatype_depth_spacing(int
> > > > depth)
> > > > default: return d5;
> > > > }
> > > > }
> > > > +
> > > > +#define __mpidi_datatype_free_and_return { \
> > > > + if (cp->nr_ints > 0) MPIU_Free(ints ); \
> > > > + if (cp->nr_aints > 0) MPIU_Free(aints); \
> > > > + if (cp->nr_types > 0) MPIU_Free(types); \
> > > > + return; }
> > > >
> > > > void MPIDI_Datatype_contents_printf(MPI_Datatype type,
> > > > int depth,
> > > > @@ -522,12 +528,22 @@ void
> > > > MPIDI_Datatype_contents_printf(MPI_Datatype type,
> > > > return;
> > > > }
> > > >
> > > > - types = (MPI_Datatype *) (((char *) cp) +
> > > > - sizeof(MPID_Datatype_contents));
> > > > - ints = (int *) (((char *) types) +
> > > > - cp->nr_types * sizeof(MPI_Datatype));
> > > > - aints = (MPI_Aint *) (((char *) ints) +
> > > > - cp->nr_ints * sizeof(int));
> > > > + if (cp->nr_ints > 0)
> > > > + {
> > > > + ints = (int*) MPIU_Malloc(cp->nr_ints * sizeof(int));
> > > > + MPIDI_Datatype_get_contents_ints(cp, ints);
> > > > + }
> > > > +
> > > > + if (cp->nr_aints > 0) {
> > > > + aints = (MPI_Aint*) MPIU_Malloc(cp->nr_aints *
> > > > sizeof(MPI_Aint));
> > > > + MPIDI_Datatype_get_contents_aints(cp, aints);
> > > > + }
> > > > +
> > > > + if (cp->nr_types > 0) {
> > > > + types = (MPI_Datatype*) MPIU_Malloc(cp->nr_types *
> > > > sizeof(MPI_Datatype));
> > > > + MPIDI_Datatype_get_contents_types(cp, types);
> > > > + }
> > > > +
> > > >
> > > > MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %scombiner: %s",
> > > > MPIDI_Datatype_depth_spacing(depth),
> > > > @@ -536,10 +552,10 @@ void
> > > > MPIDI_Datatype_contents_printf(MPI_Datatype type,
> > > > switch (cp->combiner) {
> > > > case MPI_COMBINER_NAMED:
> > > > case MPI_COMBINER_DUP:
> > > > - return;
> > > > + __mpidi_datatype_free_and_return;
> > > > case MPI_COMBINER_RESIZED:
> > > > /* not done */
> > > > - return;
> > > > + __mpidi_datatype_free_and_return;
> > > > case MPI_COMBINER_CONTIGUOUS:
> > > > MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %scontig ct
> > = %d\n",
> > > > MPIDI_Datatype_depth_spacing(depth),
> > > > @@ -547,7 +563,7 @@ void
> > MPIDI_Datatype_contents_printf(MPI_Datatype
> > > > type,
> > > > MPIDI_Datatype_contents_printf(*types,
> > > > depth + 1,
> > > > acount);
> > > > - return;
> > > > + __mpidi_datatype_free_and_return;
> > > > case MPI_COMBINER_VECTOR:
> > > > MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,
> > > > "# %svector ct = %d, blk = %d, str = %d\n",
> > > > @@ -558,7 +574,7 @@ void
> > MPIDI_Datatype_contents_printf(MPI_Datatype
> > > > type,
> > > > MPIDI_Datatype_contents_printf(*types,
> > > > depth + 1,
> > > > acount);
> > > > - return;
> > > > + __mpidi_datatype_free_and_return;
> > > > case MPI_COMBINER_HVECTOR:
> > > > MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,
> > > > "# %shvector ct = %d, blk = %d, str = "
> > > > MPI_AINT_FMT_DEC_SPEC "\n",
> > > > @@ -569,7 +585,7 @@ void
> > MPIDI_Datatype_contents_printf(MPI_Datatype
> > > > type,
> > > > MPIDI_Datatype_contents_printf(*types,
> > > > depth + 1,
> > > > acount);
> > > > - return;
> > > > + __mpidi_datatype_free_and_return;
> > > > case MPI_COMBINER_INDEXED:
> > > > MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %sindexed ct
> > = %d:",
> > > > MPIDI_Datatype_depth_spacing(depth),
> > > > @@ -585,7 +601,7 @@ void
> > MPIDI_Datatype_contents_printf(MPI_Datatype
> > > > type,
> > > > depth + 1,
> > > > acount);
> > > > }
> > > > - return;
> > > > + __mpidi_datatype_free_and_return;
> > > > case MPI_COMBINER_HINDEXED:
> > > > MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %shindexed
> > ct =
> > > > %d:",
> > > > MPIDI_Datatype_depth_spacing(depth),
> > > > @@ -601,7 +617,7 @@ void
> > MPIDI_Datatype_contents_printf(MPI_Datatype
> > > > type,
> > > > depth + 1,
> > > > acount);
> > > > }
> > > > - return;
> > > > + __mpidi_datatype_free_and_return;
> > > > case MPI_COMBINER_STRUCT:
> > > > MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %sstruct ct
> > = %d:",
> > > > MPIDI_Datatype_depth_spacing(depth),
> > > > @@ -617,11 +633,11 @@ void
> > > > MPIDI_Datatype_contents_printf(MPI_Datatype type,
> > > > depth + 1,
> > > > acount);
> > > > }
> > > > - return;
> > > > + __mpidi_datatype_free_and_return;
> > > > default:
> > > > MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %sunhandled
> > > > combiner",
> > > > MPIDI_Datatype_depth_spacing(depth)));
> > > > - return;
> > > > + __mpidi_datatype_free_and_return;
> > > > }
> > > > }
> > > > /* --END ERROR HANDLING-- */
> > > > --
> > > > 1.5.3.7
> > > >
> > >
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.mcs.anl.gov/mailman/private/mpich2-dev/attachments/20080319/4ead0331/attachment.htm>
More information about the mpich2-dev
mailing list