[mpich2-dev] [PATCH 1/1] Issue 4120: Fix mpid_type_debug alignment problem.

Rob Ross rross at mcs.anl.gov
Wed Mar 19 13:01:32 CDT 2008


Ah, looking at mpid_datatype_contents.c I see what is going on.

Here's what happened. I naively wrote the code that stores the  
contents w/out doing padding years ago. Then on some system or another  
that blew up -- we needed the padding to correctly access types.

When I updated the code, I appear to have missed the use of these  
pointers in mpid_type_debug.c, but where I was testing it must have  
been fine.

I think we can just fix the debug routines to use the same logic as in  
mpid_datatype_contig WRT padding? That would avoid us allocating and  
freeing space in the debug routines.

Rob

On Mar 19, 2008, at 10:55 AM, Bob Cernohous wrote:

>
> I was trying to dump out some datatypes with mpid_type_debug  
> routines and got garbage.
>
> It looks like it can't just calculate pointer offsets like it was  
> doing.  There's 8 byte padding
> on the datatype structure and the arrays of types/ints/aints.   
> That's not something
> I added or changed.  I just fixed debug by calling  
> MPIDI_Datatype_get_contents_xxxx
> which calculates the correct offsets (with alignment padding) using  
> code like:
>
>   int align_sz = 8, epsilon;
>
>     struct_sz = sizeof(MPID_Datatype_contents);
>     types_sz  = cp->nr_types * sizeof(MPI_Datatype);
>
>     /* pad the struct, types, and ints before we allocate.
>      *
>      * note: it's not necessary that we pad the aints,
>      *       because they are last in the region.
>      */
>     if ((epsilon = struct_sz % align_sz)) {
>         struct_sz += align_sz - epsilon;
>     }
>     if ((epsilon = types_sz % align_sz)) {
>         types_sz += align_sz - epsilon;
>     }
>
>     ptr = ((char *) cp) + struct_sz + types_sz;
>
> rather than simply doing:
>
> > > -    ints  = (int *) (((char *) types) +
> > > -           cp->nr_types * sizeof(MPI_Datatype));
>
> owner-mpich2-dev at mcs.anl.gov wrote on 03/19/2008 09:23:16 AM:
>
> > Hi Bob,
> >
> > You're saying that we can't just point at the values because they
> > aren't aligned, that they would need to be aligned if we were  
> going to
> > use them in this way?
> >
> > Thanks,
> >
> > Rob
> >
> > On Mar 19, 2008, at 9:11 AM, Bob Cernohous wrote:
> >
> > > Can't just point to ints/aints in the datatype.  Have to figure  
> out
> > > the
> > > alignment.  Easiest way is to call  
> MPIDI_Datatype_get_contents_xxxx().
> > >
> > > Signed-off-by: Bob Cernohous <bobc at us.ibm.com>
> > > ---
> > > .../src/mpid/common/datatype/mpid_type_debug.c     |   46 +++++++ 
> ++++
> > > ++------
> > > 1 files changed, 31 insertions(+), 15 deletions(-)
> > >
> > > diff --git a/lib/mpi/mpich2/src/mpid/common/datatype/
> > > mpid_type_debug.c b/lib/mpi/mpich2/src/mpid/common/datatype/
> > > mpid_type_debug.c
> > > index 1e3c761..71fafb7 100644
> > > --- a/lib/mpi/mpich2/src/mpid/common/datatype/mpid_type_debug.c
> > > +++ b/lib/mpi/mpich2/src/mpid/common/datatype/mpid_type_debug.c
> > > @@ -494,6 +494,12 @@ static char *MPIDI_Datatype_depth_spacing(int
> > > depth)
> > >    default: return d5;
> > >     }
> > > }
> > > +
> > > +#define __mpidi_datatype_free_and_return { \
> > > + if (cp->nr_ints  > 0) MPIU_Free(ints );   \
> > > + if (cp->nr_aints > 0) MPIU_Free(aints);   \
> > > + if (cp->nr_types > 0) MPIU_Free(types);   \
> > > + return;                                 }
> > >
> > > void MPIDI_Datatype_contents_printf(MPI_Datatype type,
> > >                 int depth,
> > > @@ -522,12 +528,22 @@ void
> > > MPIDI_Datatype_contents_printf(MPI_Datatype type,
> > >    return;
> > >     }
> > >
> > > -    types = (MPI_Datatype *) (((char *) cp) +
> > > -               sizeof(MPID_Datatype_contents));
> > > -    ints  = (int *) (((char *) types) +
> > > -           cp->nr_types * sizeof(MPI_Datatype));
> > > -    aints = (MPI_Aint *) (((char *) ints) +
> > > -           cp->nr_ints * sizeof(int));
> > > +    if (cp->nr_ints > 0)
> > > +    {
> > > +      ints = (int*) MPIU_Malloc(cp->nr_ints * sizeof(int));
> > > +      MPIDI_Datatype_get_contents_ints(cp, ints);
> > > +    }
> > > +
> > > +    if (cp->nr_aints > 0) {
> > > +      aints = (MPI_Aint*) MPIU_Malloc(cp->nr_aints *
> > > sizeof(MPI_Aint));
> > > +      MPIDI_Datatype_get_contents_aints(cp, aints);
> > > +    }
> > > +
> > > +    if (cp->nr_types > 0) {
> > > +      types = (MPI_Datatype*) MPIU_Malloc(cp->nr_types *
> > > sizeof(MPI_Datatype));
> > > +      MPIDI_Datatype_get_contents_types(cp, types);
> > > +    }
> > > +
> > >
> > >     MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %scombiner: %s",
> > >           MPIDI_Datatype_depth_spacing(depth),
> > > @@ -536,10 +552,10 @@ void
> > > MPIDI_Datatype_contents_printf(MPI_Datatype type,
> > >     switch (cp->combiner) {
> > >    case MPI_COMBINER_NAMED:
> > >    case MPI_COMBINER_DUP:
> > > -       return;
> > > +       __mpidi_datatype_free_and_return;
> > >    case MPI_COMBINER_RESIZED:
> > >        /* not done */
> > > -       return;
> > > +       __mpidi_datatype_free_and_return;
> > >    case MPI_COMBINER_CONTIGUOUS:
> > >        MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %scontig ct  
> = %d\n",
> > >              MPIDI_Datatype_depth_spacing(depth),
> > > @@ -547,7 +563,7 @@ void  
> MPIDI_Datatype_contents_printf(MPI_Datatype
> > > type,
> > >        MPIDI_Datatype_contents_printf(*types,
> > >                   depth + 1,
> > >                   acount);
> > > -       return;
> > > +       __mpidi_datatype_free_and_return;
> > >    case MPI_COMBINER_VECTOR:
> > >        MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,
> > >                    "# %svector ct = %d, blk = %d, str = %d\n",
> > > @@ -558,7 +574,7 @@ void  
> MPIDI_Datatype_contents_printf(MPI_Datatype
> > > type,
> > >        MPIDI_Datatype_contents_printf(*types,
> > >                   depth + 1,
> > >                   acount);
> > > -       return;
> > > +       __mpidi_datatype_free_and_return;
> > >         case MPI_COMBINER_HVECTOR:
> > >        MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,
> > >                      "# %shvector ct = %d, blk = %d, str = "
> > > MPI_AINT_FMT_DEC_SPEC "\n",
> > > @@ -569,7 +585,7 @@ void  
> MPIDI_Datatype_contents_printf(MPI_Datatype
> > > type,
> > >        MPIDI_Datatype_contents_printf(*types,
> > >                   depth + 1,
> > >                   acount);
> > > -       return;
> > > +       __mpidi_datatype_free_and_return;
> > >    case MPI_COMBINER_INDEXED:
> > >        MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %sindexed ct  
> = %d:",
> > >              MPIDI_Datatype_depth_spacing(depth),
> > > @@ -585,7 +601,7 @@ void  
> MPIDI_Datatype_contents_printf(MPI_Datatype
> > > type,
> > >                       depth + 1,
> > >                       acount);
> > >        }
> > > -       return;
> > > +       __mpidi_datatype_free_and_return;
> > >    case MPI_COMBINER_HINDEXED:
> > >        MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %shindexed  
> ct =
> > > %d:",
> > >              MPIDI_Datatype_depth_spacing(depth),
> > > @@ -601,7 +617,7 @@ void  
> MPIDI_Datatype_contents_printf(MPI_Datatype
> > > type,
> > >                       depth + 1,
> > >                       acount);
> > >        }
> > > -       return;
> > > +       __mpidi_datatype_free_and_return;
> > >    case MPI_COMBINER_STRUCT:
> > >        MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %sstruct ct  
> = %d:",
> > >              MPIDI_Datatype_depth_spacing(depth),
> > > @@ -617,11 +633,11 @@ void
> > > MPIDI_Datatype_contents_printf(MPI_Datatype type,
> > >                       depth + 1,
> > >                       acount);
> > >        }
> > > -       return;
> > > +       __mpidi_datatype_free_and_return;
> > >    default:
> > >        MPIU_DBG_OUT_FMT(DATATYPE,(MPIU_DBG_FDEST,"# %sunhandled
> > > combiner",
> > >          MPIDI_Datatype_depth_spacing(depth)));
> > > -       return;
> > > +       __mpidi_datatype_free_and_return;
> > >     }
> > > }
> > > /* --END ERROR HANDLING-- */
> > > --
> > > 1.5.3.7
> > >
> >




More information about the mpich2-dev mailing list