[mpich2-dev] [PATCH 1/1] Issue 4120: Fix mpid_type_debug alignment problem.

Rob Ross rross at mcs.anl.gov
Wed Mar 19 14:31:25 CDT 2008


fair enough. -- rob

On Mar 19, 2008, at 1:32 PM, Bob Cernohous wrote:

>
> > I think we can just fix the debug routines to use the same logic  
> as in
> > mpid_datatype_contig WRT padding? That would avoid us allocating and
> > freeing space in the debug routines.
>
> That's fine.  I considered it but since it's debug code I wasn't  
> really worried about alloc/free.
>
> Bob Cernohous:  (T/L 553) 507-253-6093
>
> BobC at us.ibm.com
> IBM Rochester, Building 030-2(C335), Department 61L
> 3605 Hwy 52 North, Rochester,  MN 55901-7829
>
> > Chaos reigns within.
> > Reflect, repent, and reboot.
> > Order shall return.
>
>
> owner-mpich2-dev at mcs.anl.gov wrote on 03/19/2008 01:01:32 PM:
>
> > Ah, looking at mpid_datatype_contents.c I see what is going on.
> >
> > Here's what happened. I naively wrote the code that stores the
> > contents w/out doing padding years ago. Then on some system or  
> another
> > that blew up -- we needed the padding to correctly access types.
> >
> > When I updated the code, I appear to have missed the use of these
> > pointers in mpid_type_debug.c, but where I was testing it must have
> > been fine.
> >
> > I think we can just fix the debug routines to use the same logic  
> as in
> > mpid_datatype_contig WRT padding? That would avoid us allocating and
> > freeing space in the debug routines.
> >
> > Rob
> >
> > On Mar 19, 2008, at 10:55 AM, Bob Cernohous wrote:
> >
> > >
> > > I was trying to dump out some datatypes with mpid_type_debug
> > > routines and got garbage.
> > >
> > > It looks like it can't just calculate pointer offsets like it was
> > > doing.  There's 8 byte padding
> > > on the datatype structure and the arrays of types/ints/aints.
> > > That's not something
> > > I added or changed.  I just fixed debug by calling
> > > MPIDI_Datatype_get_contents_xxxx
> > > which calculates the correct offsets (with alignment padding)  
> using
> > > code like:
> > >
> > >   int align_sz = 8, epsilon;
> > >
> > >     struct_sz = sizeof(MPID_Datatype_contents);
> > >     types_sz  = cp->nr_types * sizeof(MPI_Datatype);
> > >
> > >     /* pad the struct, types, and ints before we allocate.
> > >      *
> > >      * note: it's not necessary that we pad the aints,
> > >      *       because they are last in the region.
> > >      */
> > >     if ((epsilon = struct_sz % align_sz)) {
> > >         struct_sz += align_sz - epsilon;
> > >     }
> > >     if ((epsilon = types_sz % align_sz)) {
> > >         types_sz += align_sz - epsilon;
> > >     }
> > >
> > >     ptr = ((char *) cp) + struct_sz + types_sz;
> > >
> > > rather than simply doing:
> > >
> > > > > -    ints  = (int *) (((char *) types) +
> > > > > -           cp->nr_types * sizeof(MPI_Datatype));
> > >
> > > owner-mpich2-dev at mcs.anl.gov wrote on 03/19/2008 09:23:16 AM:
> > >
> > > > Hi Bob,
> > > >
> > > > You're saying that we can't just point at the values because  
> they
> > > > aren't aligned, that they would need to be aligned if we were
> > > going to
> > > > use them in this way?
> > > >
> > > > Thanks,
> > > >
> > > > Rob
> > > >
> > > > On Mar 19, 2008, at 9:11 AM, Bob Cernohous wrote:
> > > >
> > > > > Can't just point to ints/aints in the datatype.  Have to  
> figure
> > > out
> > > > > the
> > > > > alignment.  Easiest way is to call
> > > MPIDI_Datatype_get_contents_xxxx().
> > > > >
> > > > > Signed-off-by: Bob Cernohous <bobc at us.ibm.com>
> > > > > ---
> > > > > .../src/mpid/common/datatype/mpid_type_debug.c     |   46 +++ 
> ++++
> > > ++++
> > > > > ++------
> > > > > 1 files changed, 31 insertions(+), 15 deletions(-)
> > > > >
> > > > > diff --git a/lib/mpi/mpich2/src/mpid/common/datatype/
> > > > > mpid_type_debug.c b/lib/mpi/mpich2/src/mpid/common/datatype/
> > > > > mpid_type_debug.c
> > > > > index 1e3c761..71fafb7 100644
> > > > > --- a/lib/mpi/mpich2/src/mpid/common/datatype/ 
> mpid_type_debug.c
> > > > > +++ b/lib/mpi/mpich2/src/mpid/common/datatype/ 
> mpid_type_debug.c
> > > > > @@ -494,6 +494,12 @@ static char  
> *MPIDI_Datatype_depth_spacing(int
> > > > > depth)
> > > > >    default: return d5;
> > > > >     }
> > > > > }
> > > > > +
> > > > > +#define __mpidi_datatype_free_and_return { \
> > > > > + if (cp->nr_ints  > 0) MPIU_Free(ints );   \
> > > > > + if (cp->nr_aints > 0) MPIU_Free(aints);   \
> > > > > + if (cp->nr_types > 0) MPIU_Free(types);   \
> > > > > + return;                                 }
> > > > >
> > > > > void MPIDI_Datatype_contents_printf(MPI_Datatype type,
> > > > >                 int depth,
> > > > > @@ -522,12 +528,22 @@ void
> > > > > MPIDI_Datatype_contents_printf(MPI_Datatype type,
> > > > >    return;
> > > > >     }
> > > > >
> > > > > -    types = (MPI_Datatype *) (((char *) cp) +
> > > > > -               sizeof(MPID_Datatype_contents));
> > > > > -    ints  = (int *) (((char *) types) +
> > > > > -           cp->nr_types * sizeof(MPI_Datatype));
> > > > > -    aints = (MPI_Aint *) (((char *) ints) +
> > > > > -           cp->nr_ints * sizeof(int));
> > > > > +    if (cp->nr_ints > 0)
> > > > > +    {
> > > > > +      ints = (int*) MPIU_Malloc(cp->nr_ints * sizeof(int));
> > > > > +      MPIDI_Datatype_get_contents_ints(cp, ints);
> > > > > +    }
> > > > > +
> > > > > +    if (cp->nr_aints > 0) {
> > > > > +      aints = (MPI_Aint*) MPIU_Malloc(cp->nr_aints *
> > > > > sizeof(MPI_Aint));
> > > > > +      MPIDI_Datatype_get_contents_aints(cp, aints);
> > > > > +    }
> > > > > +
> > > > > +    if (cp->nr_types > 0) {
> > > > > +      types = (MPI_Datatype*) MPIU_Malloc(cp->nr_types *
> > > > > sizeof(MPI_Datatype));
> > > > > +      MPIDI_Datatype_get_contents_types(cp, types);
> > > > > +    }
> > > > > +
> > > > >
> > > > >     MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %scombiner:  
> %s",
> > > > >           MPIDI_Datatype_depth_spacing(depth),
> > > > > @@ -536,10 +552,10 @@ void
> > > > > MPIDI_Datatype_contents_printf(MPI_Datatype type,
> > > > >     switch (cp->combiner) {
> > > > >    case MPI_COMBINER_NAMED:
> > > > >    case MPI_COMBINER_DUP:
> > > > > -       return;
> > > > > +       __mpidi_datatype_free_and_return;
> > > > >    case MPI_COMBINER_RESIZED:
> > > > >        /* not done */
> > > > > -       return;
> > > > > +       __mpidi_datatype_free_and_return;
> > > > >    case MPI_COMBINER_CONTIGUOUS:
> > > > >        MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %scontig  
> ct
> > > = %d\n",
> > > > >              MPIDI_Datatype_depth_spacing(depth),
> > > > > @@ -547,7 +563,7 @@ void
> > > MPIDI_Datatype_contents_printf(MPI_Datatype
> > > > > type,
> > > > >        MPIDI_Datatype_contents_printf(*types,
> > > > >                   depth + 1,
> > > > >                   acount);
> > > > > -       return;
> > > > > +       __mpidi_datatype_free_and_return;
> > > > >    case MPI_COMBINER_VECTOR:
> > > > >        MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,
> > > > >                    "# %svector ct = %d, blk = %d, str = %d\n",
> > > > > @@ -558,7 +574,7 @@ void
> > > MPIDI_Datatype_contents_printf(MPI_Datatype
> > > > > type,
> > > > >        MPIDI_Datatype_contents_printf(*types,
> > > > >                   depth + 1,
> > > > >                   acount);
> > > > > -       return;
> > > > > +       __mpidi_datatype_free_and_return;
> > > > >         case MPI_COMBINER_HVECTOR:
> > > > >        MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,
> > > > >                      "# %shvector ct = %d, blk = %d, str = "
> > > > > MPI_AINT_FMT_DEC_SPEC "\n",
> > > > > @@ -569,7 +585,7 @@ void
> > > MPIDI_Datatype_contents_printf(MPI_Datatype
> > > > > type,
> > > > >        MPIDI_Datatype_contents_printf(*types,
> > > > >                   depth + 1,
> > > > >                   acount);
> > > > > -       return;
> > > > > +       __mpidi_datatype_free_and_return;
> > > > >    case MPI_COMBINER_INDEXED:
> > > > >        MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"#  
> %sindexed ct
> > > = %d:",
> > > > >              MPIDI_Datatype_depth_spacing(depth),
> > > > > @@ -585,7 +601,7 @@ void
> > > MPIDI_Datatype_contents_printf(MPI_Datatype
> > > > > type,
> > > > >                       depth + 1,
> > > > >                       acount);
> > > > >        }
> > > > > -       return;
> > > > > +       __mpidi_datatype_free_and_return;
> > > > >    case MPI_COMBINER_HINDEXED:
> > > > >        MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %shindexed
> > > ct =
> > > > > %d:",
> > > > >              MPIDI_Datatype_depth_spacing(depth),
> > > > > @@ -601,7 +617,7 @@ void
> > > MPIDI_Datatype_contents_printf(MPI_Datatype
> > > > > type,
> > > > >                       depth + 1,
> > > > >                       acount);
> > > > >        }
> > > > > -       return;
> > > > > +       __mpidi_datatype_free_and_return;
> > > > >    case MPI_COMBINER_STRUCT:
> > > > >        MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %sstruct  
> ct
> > > = %d:",
> > > > >              MPIDI_Datatype_depth_spacing(depth),
> > > > > @@ -617,11 +633,11 @@ void
> > > > > MPIDI_Datatype_contents_printf(MPI_Datatype type,
> > > > >                       depth + 1,
> > > > >                       acount);
> > > > >        }
> > > > > -       return;
> > > > > +       __mpidi_datatype_free_and_return;
> > > > >    default:
> > > > >        MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"#  
> %sunhandled
> > > > > combiner",
> > > > >          MPIDI_Datatype_depth_spacing(depth)));
> > > > > -       return;
> > > > > +       __mpidi_datatype_free_and_return;
> > > > >     }
> > > > > }
> > > > > /* --END ERROR HANDLING-- */
> > > > > --
> > > > > 1.5.3.7
> > > > >
> > > >
> >




More information about the mpich2-dev mailing list