[mpich2-dev] [PATCH] Make the VCR API independent of the nature of the VCR list

Pavan Balaji balaji at mcs.anl.gov
Wed Oct 6 06:44:02 CDT 2010


Thanks Joe. This looks good, though we'll need to port it to CH3 as well.

mpich2-core: Should this go into 1.3? This is an ADI change (though 
fairly minor). But we are already at 1.3rc2, so I don't know if we want 
to make such a change at this point.

  -- Pavan

On 10/05/2010 04:40 PM, Joe Ratterman wrote:
> From: Joe Ratterman<jratt at us.ibm.com>
>
>    Currently, all implementations of a communicator rank list must
>    contain an actual list at least as long as the number of ranks in
>    the communicator.  This is because "comm_ptr->local_vcr[i]" is
>    assumed to work.
>
>    To avoid the list, it is possible to change the MPID_VCR API.
>    Basically, all places that were previously passed
>    "comm_ptr->local_vcr[i]" now contain "comm_ptr->local_vcr, i".  This
>    means that the VCR table can be something other than a list, and
>    that callers are requesting some processing on the "i"th value
>    instead of actually passing the "i"th value into the function.
>
>    This patch makes the necessary changes to the core of MPICH2 1.3rc2,
>    as well as changing an alternative version of the "dcmf" device.
>
> Signed-off-by: Joe Ratterman<jratt at us.ibm.com>
> ---
>   mpich2/src/include/mpiimpl.h                 |    4 ++--
>   mpich2/src/mpi/comm/comm_create.c            |    4 ++--
>   mpich2/src/mpi/comm/comm_group.c             |    2 +-
>   mpich2/src/mpi/comm/comm_remote_group.c      |    2 +-
>   mpich2/src/mpi/comm/comm_split.c             |   12 ++++++------
>   mpich2/src/mpi/comm/commutil.c               |   10 +++++-----
>   mpich2/src/mpi/comm/intercomm_create.c       |   10 +++++-----
>   mpich2/src/mpi/comm/intercomm_merge.c        |    8 ++++----
>   mpich2/src/mpi/group/grouputil.c             |    2 +-
>   mpich2/src/mpid/dcmfd/include/mpidi_hooks.h  |    3 ++-
>   mpich2/src/mpid/dcmfd/include/mpidi_macros.h |    2 +-
>   mpich2/src/mpid/dcmfd/src/mpid_init.c        |    4 ++--
>   mpich2/src/mpid/dcmfd/src/mpid_vc.c          |   16 ++++++++--------
>   13 files changed, 40 insertions(+), 39 deletions(-)
>
> diff --git a/mpich2/src/include/mpiimpl.h b/mpich2/src/include/mpiimpl.h
> index 013b4fe..6fd86fd 100644
> --- a/mpich2/src/include/mpiimpl.h
> +++ b/mpich2/src/include/mpiimpl.h
> @@ -3116,7 +3116,7 @@ int MPID_VCRT_Get_ptr(MPID_VCRT vcrt, MPID_VCR **vc_pptr);
>   /*@
>     MPID_VCR_Dup - Create a duplicate reference to a virtual connection
>     @*/
> -int MPID_VCR_Dup(MPID_VCR orig_vcr, MPID_VCR * new_vcr);
> +int MPID_VCR_Dup(const MPID_VCR *orig_vcr, unsigned origin_index, MPID_VCR *new_vcr, unsigned new_index);
>
>   /*@
>      MPID_VCR_Get_lpid - Get the local process id that corresponds to a
> @@ -3129,7 +3129,7 @@ int MPID_VCR_Dup(MPID_VCR orig_vcr, MPID_VCR * new_vcr);
>      processes may use different ids to identify the same target process
>     @*/
>   /* We macro-ized this at the device. */
> -int MPID_VCR_Get_lpid(MPID_VCR vcr, int * lpid_ptr);
> +int MPID_VCR_Get_lpid(MPID_VCR *vcr, unsigned index, int * lpid_ptr);
>
>   /* ------------------------------------------------------------------------- */
>   /* Define a macro to allow us to select between statically selected functions
> diff --git a/mpich2/src/mpi/comm/comm_create.c b/mpich2/src/mpi/comm/comm_create.c
> index 33bab93..0241f49 100644
> --- a/mpich2/src/mpi/comm/comm_create.c
> +++ b/mpich2/src/mpi/comm/comm_create.c
> @@ -154,7 +154,7 @@ PMPI_LOCAL int MPIR_Comm_create_calculate_mapping(MPID_Group  *group_ptr,
>               mapping[i] = -1;
>               for (j=0; j<vcr_size; j++) {
>                   int comm_lpid;
> -                MPID_VCR_Get_lpid( vcr[j],&comm_lpid );
> +                MPID_VCR_Get_lpid( vcr, j,&comm_lpid );
>                   if (comm_lpid == group_ptr->lrank_to_lpid[i].lpid) {
>                       mapping[i] = j;
>                       break;
> @@ -207,7 +207,7 @@ PMPI_LOCAL int MPIR_Comm_create_create_and_map_vcrt(int         n,
>           MPIU_DBG_MSG_FMT(COMM,VERBOSE,
>                            (MPIU_DBG_FDEST, "dupping from mapping_vcr=%p rank=%d into new_rank=%d/%d in new_vcr=%p",
>                             mapping_vcr, mapping[i], i, n, vcr));
> -        mpi_errno = MPID_VCR_Dup(mapping_vcr[mapping[i]],&vcr[i]);
> +        mpi_errno = MPID_VCR_Dup(mapping_vcr, mapping[i], vcr, i);
>           if (mpi_errno) MPIU_ERR_POP(mpi_errno);
>       }
>
> diff --git a/mpich2/src/mpi/comm/comm_group.c b/mpich2/src/mpi/comm/comm_group.c
> index 7835307..d0700ae 100644
> --- a/mpich2/src/mpi/comm/comm_group.c
> +++ b/mpich2/src/mpi/comm/comm_group.c
> @@ -49,7 +49,7 @@ int MPIR_Comm_group_impl(MPID_Comm *comm_ptr, MPID_Group **group_ptr)
>   	    local_vcr = comm_ptr->vcr;
>   	
>   	for (i=0; i<n; i++) {
> -	    (void) MPID_VCR_Get_lpid( local_vcr[i],&lpid );
> +	    (void) MPID_VCR_Get_lpid( local_vcr, i,&lpid );
>   	    (*group_ptr)->lrank_to_lpid[i].lrank = i;
>   	    (*group_ptr)->lrank_to_lpid[i].lpid  = lpid;
>   	}
> diff --git a/mpich2/src/mpi/comm/comm_remote_group.c b/mpich2/src/mpi/comm/comm_remote_group.c
> index a331bd0..5792df7 100644
> --- a/mpich2/src/mpi/comm/comm_remote_group.c
> +++ b/mpich2/src/mpi/comm/comm_remote_group.c
> @@ -41,7 +41,7 @@ int MPIR_Comm_remote_group_impl(MPID_Comm *comm_ptr, MPID_Group **group_ptr)
>
>           for (i=0; i<n; i++) {
>               (*group_ptr)->lrank_to_lpid[i].lrank = i;
> -            (void) MPID_VCR_Get_lpid( comm_ptr->vcr[i],&lpid );
> +            (void) MPID_VCR_Get_lpid( comm_ptr->vcr, i,&lpid );
>               (*group_ptr)->lrank_to_lpid[i].lpid  = lpid;
>           }
>           (*group_ptr)->size = n;
> diff --git a/mpich2/src/mpi/comm/comm_split.c b/mpich2/src/mpi/comm/comm_split.c
> index 964f241..603ee78 100644
> --- a/mpich2/src/mpi/comm/comm_split.c
> +++ b/mpich2/src/mpi/comm/comm_split.c
> @@ -236,8 +236,8 @@ int MPIR_Comm_split_impl(MPID_Comm *comm_ptr, int color, int key, MPID_Comm **ne
>   	    MPID_VCRT_Get_ptr( (*newcomm_ptr)->local_vcrt,
>   			&(*newcomm_ptr)->local_vcr );
>   	    for (i=0; i<new_size; i++) {
> -		MPID_VCR_Dup( comm_ptr->local_vcr[keytable[i].color],
> -			&(*newcomm_ptr)->local_vcr[i] );
> +		MPID_VCR_Dup( comm_ptr->local_vcr, keytable[i].color,
> +			      (*newcomm_ptr)->local_vcr, i );
>   		if (keytable[i].color == comm_ptr->rank) {
>   		    (*newcomm_ptr)->rank = i;
>   		}
> @@ -261,8 +261,8 @@ int MPIR_Comm_split_impl(MPID_Comm *comm_ptr, int color, int key, MPID_Comm **ne
>   	    MPID_VCRT_Create( new_remote_size,&(*newcomm_ptr)->vcrt );
>   	    MPID_VCRT_Get_ptr( (*newcomm_ptr)->vcrt,&(*newcomm_ptr)->vcr );
>   	    for (i=0; i<new_remote_size; i++) {
> -		MPID_VCR_Dup( comm_ptr->vcr[remotekeytable[i].color],
> -			&(*newcomm_ptr)->vcr[i] );
> +		MPID_VCR_Dup( comm_ptr->vcr, remotekeytable[i].color,
> +			      (*newcomm_ptr)->vcr, i );
>   	    }
>
>   	    (*newcomm_ptr)->context_id     = remote_context_id;
> @@ -278,8 +278,8 @@ int MPIR_Comm_split_impl(MPID_Comm *comm_ptr, int color, int key, MPID_Comm **ne
>   	    MPID_VCRT_Create( new_size,&(*newcomm_ptr)->vcrt );
>   	    MPID_VCRT_Get_ptr( (*newcomm_ptr)->vcrt,&(*newcomm_ptr)->vcr );
>   	    for (i=0; i<new_size; i++) {
> -		MPID_VCR_Dup( comm_ptr->vcr[keytable[i].color],
> -			&(*newcomm_ptr)->vcr[i] );
> +		MPID_VCR_Dup( comm_ptr->vcr, keytable[i].color,
> +			      (*newcomm_ptr)->vcr, i );
>   		if (keytable[i].color == comm_ptr->rank) {
>   		    (*newcomm_ptr)->rank = i;
>   		}
> diff --git a/mpich2/src/mpi/comm/commutil.c b/mpich2/src/mpi/comm/commutil.c
> index 1eeaedc..fb7108d 100644
> --- a/mpich2/src/mpi/comm/commutil.c
> +++ b/mpich2/src/mpi/comm/commutil.c
> @@ -269,8 +269,8 @@ int MPIR_Comm_commit(MPID_Comm *comm)
>               for (i = 0; i<  num_local; ++i) {
>                   /* For rank i in the new communicator, find the corresponding
>                      rank in the input communicator */
> -                MPID_VCR_Dup( comm->vcr[local_procs[i]],
> -&comm->node_comm->vcr[i] );
> +                MPID_VCR_Dup( comm->vcr, local_procs[i],
> +                              comm->node_comm->vcr, i );
>               }
>
>               MPID_Dev_comm_create_hook( comm->node_comm );
> @@ -297,8 +297,8 @@ int MPIR_Comm_commit(MPID_Comm *comm)
>               for (i = 0; i<  num_external; ++i) {
>                   /* For rank i in the new communicator, find the corresponding
>                      rank in the input communicator */
> -                MPID_VCR_Dup( comm->vcr[external_procs[i]],
> -&comm->node_roots_comm->vcr[i] );
> +                MPID_VCR_Dup( comm->vcr, external_procs[i],
> +                              comm->node_roots_comm->vcr, i );
>               }
>
>               MPID_Dev_comm_create_hook( comm->node_roots_comm );
> @@ -1006,7 +1006,7 @@ int MPIR_Comm_copy( MPID_Comm *comm_ptr, int size, MPID_Comm **outcomm_ptr )
>   	for (i=0; i<size; i++) {
>   	    /* For rank i in the new communicator, find the corresponding
>   	       rank in the input communicator */
> -	    MPID_VCR_Dup( comm_ptr->vcr[i],&newcomm_ptr->vcr[i] );
> +	    MPID_VCR_Dup( comm_ptr->vcr, i, newcomm_ptr->vcr, i );
>   	}
>       }
>
> diff --git a/mpich2/src/mpi/comm/intercomm_create.c b/mpich2/src/mpi/comm/intercomm_create.c
> index e713830..0cb5c1e 100644
> --- a/mpich2/src/mpi/comm/intercomm_create.c
> +++ b/mpich2/src/mpi/comm/intercomm_create.c
> @@ -105,7 +105,7 @@ PMPI_LOCAL int MPID_GPID_GetAllInComm( MPID_Comm *comm_ptr, int local_size,
>
>       for (i=0; i<comm_ptr->local_size; i++) {
>   	*gpid++ = 0;
> -	(void)MPID_VCR_Get_lpid( comm_ptr->vcr[i], gpid );
> +	(void)MPID_VCR_Get_lpid( comm_ptr->vcr, i, gpid );
>   	gpid++;
>       }
>       *singlePG = 1;
> @@ -140,8 +140,8 @@ PMPI_LOCAL int MPID_VCR_CommFromLpids( MPID_Comm *newcomm_ptr,
>   	/* printf( "[%d] Remote rank %d has lpid %d\n",
>   	   MPIR_Process.comm_world->rank, i, lpids[i] ); */
>   	if (lpids[i]<  commworld_ptr->remote_size) {
> -	    MPID_VCR_Dup( commworld_ptr->vcr[lpids[i]],
> -			&newcomm_ptr->vcr[i] );
> +	    MPID_VCR_Dup( commworld_ptr->vcr, lpids[i],
> +			  newcomm_ptr->vcr, i );
>   	}
>   	else {
>   	    /* We must find the corresponding vcr for a given lpid */
> @@ -163,7 +163,7 @@ PMPI_LOCAL int MPID_LPID_GetAllInComm( MPID_Comm *comm_ptr, int local_size,
>       /* FIXME: Should be using the local_size argument */
>       MPIU_Assert( comm_ptr->local_size == local_size );
>       for (i=0; i<comm_ptr->local_size; i++) {
> -	(void)MPID_VCR_Get_lpid( comm_ptr->vcr[i],&local_lpids[i] );
> +	(void)MPID_VCR_Get_lpid( comm_ptr->vcr, i,&local_lpids[i] );
>       }
>       return 0;
>   }
> @@ -539,7 +539,7 @@ int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader,
>       MPID_VCRT_Create( comm_ptr->local_size,&newcomm_ptr->local_vcrt );
>       MPID_VCRT_Get_ptr( newcomm_ptr->local_vcrt,&newcomm_ptr->local_vcr );
>       for (i=0; i<comm_ptr->local_size; i++) {
> -	MPID_VCR_Dup( comm_ptr->vcr[i],&newcomm_ptr->local_vcr[i] );
> +	MPID_VCR_Dup( comm_ptr->vcr, i, newcomm_ptr->local_vcr, i );
>       }
>
>       /* Inherit the error handler (if any) */
> diff --git a/mpich2/src/mpi/comm/intercomm_merge.c b/mpich2/src/mpi/comm/intercomm_merge.c
> index 8fae687..b672767 100644
> --- a/mpich2/src/mpi/comm/intercomm_merge.c
> +++ b/mpich2/src/mpi/comm/intercomm_merge.c
> @@ -227,11 +227,11 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, MPI_Comm *newintracomm)
>   	/* remote group first */
>   	j = 0;
>   	for (i=0; i<comm_ptr->remote_size; i++) {
> -	    MPID_VCR_Dup( comm_ptr->vcr[i],&newcomm_ptr->vcr[j++] );
> +	    MPID_VCR_Dup( comm_ptr->vcr, i, newcomm_ptr->vcr, j++ );
>   	}
>   	for (i=0; i<comm_ptr->local_size; i++) {
>   	    if (i == comm_ptr->rank) newcomm_ptr->rank = j;
> -	    MPID_VCR_Dup( comm_ptr->local_vcr[i],&newcomm_ptr->vcr[j++] );
> +	    MPID_VCR_Dup( comm_ptr->local_vcr, i, newcomm_ptr->vcr, j++ );
>   	}
>       }
>       else {
> @@ -239,10 +239,10 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, MPI_Comm *newintracomm)
>   	j = 0;
>   	for (i=0; i<comm_ptr->local_size; i++) {
>   	    if (i == comm_ptr->rank) newcomm_ptr->rank = j;
> -	    MPID_VCR_Dup( comm_ptr->local_vcr[i],&newcomm_ptr->vcr[j++] );
> +	    MPID_VCR_Dup( comm_ptr->local_vcr, i, newcomm_ptr->vcr, j++ );
>   	}
>   	for (i=0; i<comm_ptr->remote_size; i++) {
> -	    MPID_VCR_Dup( comm_ptr->vcr[i],&newcomm_ptr->vcr[j++] );
> +	    MPID_VCR_Dup( comm_ptr->vcr, i, newcomm_ptr->vcr, j++ );
>   	}
>       }
>
> diff --git a/mpich2/src/mpi/group/grouputil.c b/mpich2/src/mpi/group/grouputil.c
> index dda81b3..963bad4 100644
> --- a/mpich2/src/mpi/group/grouputil.c
> +++ b/mpich2/src/mpi/group/grouputil.c
> @@ -383,7 +383,7 @@ int MPIR_GroupCheckVCRSubset( MPID_Group *group_ptr, int vsize, MPID_VCR *vcr,
>   			vsize*sizeof(MPID_Group_pmap_t),mpi_errno, "" );
>       /* Initialize the vmap */
>       for (i=0; i<vsize; i++) {
> -	MPID_VCR_Get_lpid( vcr[i],&vmap[i].lpid );
> +	MPID_VCR_Get_lpid( vcr, i,&vmap[i].lpid );
>   	vmap[i].lrank     = i;
>   	vmap[i].next_lpid = 0;
>   	vmap[i].flag      = 0;
> diff --git a/mpich2/src/mpid/dcmfd/include/mpidi_hooks.h b/mpich2/src/mpid/dcmfd/include/mpidi_hooks.h
> index d7db400..9081a4a 100644
> --- a/mpich2/src/mpid/dcmfd/include/mpidi_hooks.h
> +++ b/mpich2/src/mpid/dcmfd/include/mpidi_hooks.h
> @@ -13,7 +13,8 @@
>   #define __include_mpidi_hooks_h__
>
>
> -typedef dcmf_task_t         MPID_VCR;
> +typedef void                MPID_VCR;
> +typedef dcmf_task_t         MPIDI_VCR;
>   typedef struct MPIDI_VCRT * MPID_VCRT;
>
>   typedef size_t              MPIDI_msg_sz_t;
> diff --git a/mpich2/src/mpid/dcmfd/include/mpidi_macros.h b/mpich2/src/mpid/dcmfd/include/mpidi_macros.h
> index 3acd105..73f1be9 100644
> --- a/mpich2/src/mpid/dcmfd/include/mpidi_macros.h
> +++ b/mpich2/src/mpid/dcmfd/include/mpidi_macros.h
> @@ -232,7 +232,7 @@ MPIDI_Context_local(MPID_Request * req)
>
>   #define MPID_VCR_GET_LPID(vcr, index)           \
>   ({                                              \
> -  vcr[index];                                   \
> +  ((MPIDI_VCR*)vcr)[index];                     \
>   })
>   #define MPID_GPID_Get(comm_ptr, rank, gpid)             \
>   {                                                       \
> diff --git a/mpich2/src/mpid/dcmfd/src/mpid_init.c b/mpich2/src/mpid/dcmfd/src/mpid_init.c
> index 1a31681..4acdb13 100644
> --- a/mpich2/src/mpid/dcmfd/src/mpid_init.c
> +++ b/mpich2/src/mpid/dcmfd/src/mpid_init.c
> @@ -277,7 +277,7 @@ int MPID_Init(int * argc,
>     rc = MPID_VCRT_Get_ptr(comm->vcrt,&comm->vcr);
>     MPID_assert(rc == MPI_SUCCESS);
>     for (i=0; i<size; i++)
> -    comm->vcr[i] = i;
> +    ((MPIDI_VCR*)comm->vcr)[i] = i;
>
>      /* basically a noop for now */
>     MPIDI_Comm_create(comm);
> @@ -295,7 +295,7 @@ int MPID_Init(int * argc,
>     MPID_assert(rc == MPI_SUCCESS);
>     rc = MPID_VCRT_Get_ptr(comm->vcrt,&comm->vcr);
>     MPID_assert(rc == MPI_SUCCESS);
> -  comm->vcr[0] = rank;
> +  ((MPIDI_VCR*)comm->vcr)[0] = rank;
>
>
>     /* ------------------------------- */
> diff --git a/mpich2/src/mpid/dcmfd/src/mpid_vc.c b/mpich2/src/mpid/dcmfd/src/mpid_vc.c
> index a106fc0..2fb4769 100644
> --- a/mpich2/src/mpid/dcmfd/src/mpid_vc.c
> +++ b/mpich2/src/mpid/dcmfd/src/mpid_vc.c
> @@ -17,20 +17,20 @@ struct MPIDI_VCRT
>   {
>     MPIU_OBJECT_HEADER;
>     unsigned size;          /**<  Number of entries in the table */
> -  MPID_VCR vcr_table[0];  /**<  Array of virtual connection references */
> +  MPIDI_VCR vcr_table[0]; /**<  Array of virtual connection references */
>   };
>
>
> -int MPID_VCR_Dup(MPID_VCR orig_vcr, MPID_VCR * new_vcr)
> +int MPID_VCR_Dup(const MPID_VCR *orig_vcr, unsigned origin_index, MPID_VCR *new_vcr, unsigned new_index)
>   {
> -    *new_vcr = orig_vcr;
> -    return MPI_SUCCESS;
> +  ((MPIDI_VCR*)new_vcr)[new_index] = MPID_VCR_GET_LPID(orig_vcr, origin_index);
> +  return MPI_SUCCESS;
>   }
>
> -int MPID_VCR_Get_lpid(MPID_VCR vcr, int * lpid_ptr)
> +int MPID_VCR_Get_lpid(MPID_VCR *vcr, unsigned index, int * lpid_ptr)
>   {
> -    *lpid_ptr = (int)vcr;
> -    return MPI_SUCCESS;
> +  *lpid_ptr = MPID_VCR_GET_LPID(vcr, index);
> +  return MPI_SUCCESS;
>   }
>
>   int MPID_VCRT_Create(int size, MPID_VCRT *vcrt_ptr)
> @@ -38,7 +38,7 @@ int MPID_VCRT_Create(int size, MPID_VCRT *vcrt_ptr)
>       struct MPIDI_VCRT * vcrt;
>       int result;
>
> -    vcrt = MPIU_Malloc(sizeof(struct MPIDI_VCRT) + size*sizeof(MPID_VCR));
> +    vcrt = MPIU_Malloc(sizeof(struct MPIDI_VCRT) + size*sizeof(MPIDI_VCR));
>       if (vcrt != NULL)
>       {
>           MPIU_Object_set_ref(vcrt, 1);

-- 
Pavan Balaji
http://www.mcs.anl.gov/~balaji


More information about the mpich2-dev mailing list