[mpich2-dev] [PATCH] Make the VCR API independent of the nature of the VCR list
Dave Goodell
goodell at mcs.anl.gov
Wed Oct 6 07:25:09 CDT 2010
I sent a longer response to Joe's mail last night, but it appears to have gotten clogged in the pipes somewhere...
Basically, this patch appears to be a dup of a ticket (#1061?). I tried applying it but it causes a lot of problems in ch3 that aren't easy to solve. We should integrate this sort of functionality, but we might need to do it slightly differently. I just haven't had the time recently.
We should not attempt to slip this in at this late stage for 1.3.
-Dave
On Oct 6, 2010, at 7:44 AM, Pavan Balaji <balaji at mcs.anl.gov> wrote:
>
> Thanks Joe. This looks good, though we'll need to port it to CH3 as well.
>
> mpich2-core: Should this go into 1.3? This is an ADI change (though fairly minor). But we are already at 1.3rc2, so I don't know if we want to make such a change at this point.
>
> -- Pavan
>
> On 10/05/2010 04:40 PM, Joe Ratterman wrote:
>> From: Joe Ratterman<jratt at us.ibm.com>
>>
>> Currently, all implementations of a communicator rank list must
>> contain an actual list at least as long as the number of ranks in
>> the communicator. This is because "comm_ptr->local_vcr[i]" is
>> assumed to work.
>>
>> To avoid the list, it is possible to change the MPID_VCR API.
>> Basically, all places that were previously passed
>> "comm_ptr->local_vcr[i]" now contain "comm_ptr->local_vcr, i". This
>> means that the VCR table can be something other than a list, and
>> that callers are requesting some processing on the "i"th value
>> instead of actually passing the "i"th value into the function.
>>
>> This patch makes the necessary changes to the core of MPICH2 1.3rc2,
>> as well as changing an alternative version of the "dcmf" device.
>>
>> Signed-off-by: Joe Ratterman<jratt at us.ibm.com>
>> ---
>> mpich2/src/include/mpiimpl.h | 4 ++--
>> mpich2/src/mpi/comm/comm_create.c | 4 ++--
>> mpich2/src/mpi/comm/comm_group.c | 2 +-
>> mpich2/src/mpi/comm/comm_remote_group.c | 2 +-
>> mpich2/src/mpi/comm/comm_split.c | 12 ++++++------
>> mpich2/src/mpi/comm/commutil.c | 10 +++++-----
>> mpich2/src/mpi/comm/intercomm_create.c | 10 +++++-----
>> mpich2/src/mpi/comm/intercomm_merge.c | 8 ++++----
>> mpich2/src/mpi/group/grouputil.c | 2 +-
>> mpich2/src/mpid/dcmfd/include/mpidi_hooks.h | 3 ++-
>> mpich2/src/mpid/dcmfd/include/mpidi_macros.h | 2 +-
>> mpich2/src/mpid/dcmfd/src/mpid_init.c | 4 ++--
>> mpich2/src/mpid/dcmfd/src/mpid_vc.c | 16 ++++++++--------
>> 13 files changed, 40 insertions(+), 39 deletions(-)
>>
>> diff --git a/mpich2/src/include/mpiimpl.h b/mpich2/src/include/mpiimpl.h
>> index 013b4fe..6fd86fd 100644
>> --- a/mpich2/src/include/mpiimpl.h
>> +++ b/mpich2/src/include/mpiimpl.h
>> @@ -3116,7 +3116,7 @@ int MPID_VCRT_Get_ptr(MPID_VCRT vcrt, MPID_VCR **vc_pptr);
>> /*@
>> MPID_VCR_Dup - Create a duplicate reference to a virtual connection
>> @*/
>> -int MPID_VCR_Dup(MPID_VCR orig_vcr, MPID_VCR * new_vcr);
>> +int MPID_VCR_Dup(const MPID_VCR *orig_vcr, unsigned origin_index, MPID_VCR *new_vcr, unsigned new_index);
>>
>> /*@
>> MPID_VCR_Get_lpid - Get the local process id that corresponds to a
>> @@ -3129,7 +3129,7 @@ int MPID_VCR_Dup(MPID_VCR orig_vcr, MPID_VCR * new_vcr);
>> processes may use different ids to identify the same target process
>> @*/
>> /* We macro-ized this at the device. */
>> -int MPID_VCR_Get_lpid(MPID_VCR vcr, int * lpid_ptr);
>> +int MPID_VCR_Get_lpid(MPID_VCR *vcr, unsigned index, int * lpid_ptr);
>>
>> /* ------------------------------------------------------------------------- */
>> /* Define a macro to allow us to select between statically selected functions
>> diff --git a/mpich2/src/mpi/comm/comm_create.c b/mpich2/src/mpi/comm/comm_create.c
>> index 33bab93..0241f49 100644
>> --- a/mpich2/src/mpi/comm/comm_create.c
>> +++ b/mpich2/src/mpi/comm/comm_create.c
>> @@ -154,7 +154,7 @@ PMPI_LOCAL int MPIR_Comm_create_calculate_mapping(MPID_Group *group_ptr,
>> mapping[i] = -1;
>> for (j=0; j<vcr_size; j++) {
>> int comm_lpid;
>> - MPID_VCR_Get_lpid( vcr[j],&comm_lpid );
>> + MPID_VCR_Get_lpid( vcr, j,&comm_lpid );
>> if (comm_lpid == group_ptr->lrank_to_lpid[i].lpid) {
>> mapping[i] = j;
>> break;
>> @@ -207,7 +207,7 @@ PMPI_LOCAL int MPIR_Comm_create_create_and_map_vcrt(int n,
>> MPIU_DBG_MSG_FMT(COMM,VERBOSE,
>> (MPIU_DBG_FDEST, "dupping from mapping_vcr=%p rank=%d into new_rank=%d/%d in new_vcr=%p",
>> mapping_vcr, mapping[i], i, n, vcr));
>> - mpi_errno = MPID_VCR_Dup(mapping_vcr[mapping[i]],&vcr[i]);
>> + mpi_errno = MPID_VCR_Dup(mapping_vcr, mapping[i], vcr, i);
>> if (mpi_errno) MPIU_ERR_POP(mpi_errno);
>> }
>>
>> diff --git a/mpich2/src/mpi/comm/comm_group.c b/mpich2/src/mpi/comm/comm_group.c
>> index 7835307..d0700ae 100644
>> --- a/mpich2/src/mpi/comm/comm_group.c
>> +++ b/mpich2/src/mpi/comm/comm_group.c
>> @@ -49,7 +49,7 @@ int MPIR_Comm_group_impl(MPID_Comm *comm_ptr, MPID_Group **group_ptr)
>> local_vcr = comm_ptr->vcr;
>>
>> for (i=0; i<n; i++) {
>> - (void) MPID_VCR_Get_lpid( local_vcr[i],&lpid );
>> + (void) MPID_VCR_Get_lpid( local_vcr, i,&lpid );
>> (*group_ptr)->lrank_to_lpid[i].lrank = i;
>> (*group_ptr)->lrank_to_lpid[i].lpid = lpid;
>> }
>> diff --git a/mpich2/src/mpi/comm/comm_remote_group.c b/mpich2/src/mpi/comm/comm_remote_group.c
>> index a331bd0..5792df7 100644
>> --- a/mpich2/src/mpi/comm/comm_remote_group.c
>> +++ b/mpich2/src/mpi/comm/comm_remote_group.c
>> @@ -41,7 +41,7 @@ int MPIR_Comm_remote_group_impl(MPID_Comm *comm_ptr, MPID_Group **group_ptr)
>>
>> for (i=0; i<n; i++) {
>> (*group_ptr)->lrank_to_lpid[i].lrank = i;
>> - (void) MPID_VCR_Get_lpid( comm_ptr->vcr[i],&lpid );
>> + (void) MPID_VCR_Get_lpid( comm_ptr->vcr, i,&lpid );
>> (*group_ptr)->lrank_to_lpid[i].lpid = lpid;
>> }
>> (*group_ptr)->size = n;
>> diff --git a/mpich2/src/mpi/comm/comm_split.c b/mpich2/src/mpi/comm/comm_split.c
>> index 964f241..603ee78 100644
>> --- a/mpich2/src/mpi/comm/comm_split.c
>> +++ b/mpich2/src/mpi/comm/comm_split.c
>> @@ -236,8 +236,8 @@ int MPIR_Comm_split_impl(MPID_Comm *comm_ptr, int color, int key, MPID_Comm **ne
>> MPID_VCRT_Get_ptr( (*newcomm_ptr)->local_vcrt,
>> &(*newcomm_ptr)->local_vcr );
>> for (i=0; i<new_size; i++) {
>> - MPID_VCR_Dup( comm_ptr->local_vcr[keytable[i].color],
>> - &(*newcomm_ptr)->local_vcr[i] );
>> + MPID_VCR_Dup( comm_ptr->local_vcr, keytable[i].color,
>> + (*newcomm_ptr)->local_vcr, i );
>> if (keytable[i].color == comm_ptr->rank) {
>> (*newcomm_ptr)->rank = i;
>> }
>> @@ -261,8 +261,8 @@ int MPIR_Comm_split_impl(MPID_Comm *comm_ptr, int color, int key, MPID_Comm **ne
>> MPID_VCRT_Create( new_remote_size,&(*newcomm_ptr)->vcrt );
>> MPID_VCRT_Get_ptr( (*newcomm_ptr)->vcrt,&(*newcomm_ptr)->vcr );
>> for (i=0; i<new_remote_size; i++) {
>> - MPID_VCR_Dup( comm_ptr->vcr[remotekeytable[i].color],
>> - &(*newcomm_ptr)->vcr[i] );
>> + MPID_VCR_Dup( comm_ptr->vcr, remotekeytable[i].color,
>> + (*newcomm_ptr)->vcr, i );
>> }
>>
>> (*newcomm_ptr)->context_id = remote_context_id;
>> @@ -278,8 +278,8 @@ int MPIR_Comm_split_impl(MPID_Comm *comm_ptr, int color, int key, MPID_Comm **ne
>> MPID_VCRT_Create( new_size,&(*newcomm_ptr)->vcrt );
>> MPID_VCRT_Get_ptr( (*newcomm_ptr)->vcrt,&(*newcomm_ptr)->vcr );
>> for (i=0; i<new_size; i++) {
>> - MPID_VCR_Dup( comm_ptr->vcr[keytable[i].color],
>> - &(*newcomm_ptr)->vcr[i] );
>> + MPID_VCR_Dup( comm_ptr->vcr, keytable[i].color,
>> + (*newcomm_ptr)->vcr, i );
>> if (keytable[i].color == comm_ptr->rank) {
>> (*newcomm_ptr)->rank = i;
>> }
>> diff --git a/mpich2/src/mpi/comm/commutil.c b/mpich2/src/mpi/comm/commutil.c
>> index 1eeaedc..fb7108d 100644
>> --- a/mpich2/src/mpi/comm/commutil.c
>> +++ b/mpich2/src/mpi/comm/commutil.c
>> @@ -269,8 +269,8 @@ int MPIR_Comm_commit(MPID_Comm *comm)
>> for (i = 0; i< num_local; ++i) {
>> /* For rank i in the new communicator, find the corresponding
>> rank in the input communicator */
>> - MPID_VCR_Dup( comm->vcr[local_procs[i]],
>> -&comm->node_comm->vcr[i] );
>> + MPID_VCR_Dup( comm->vcr, local_procs[i],
>> + comm->node_comm->vcr, i );
>> }
>>
>> MPID_Dev_comm_create_hook( comm->node_comm );
>> @@ -297,8 +297,8 @@ int MPIR_Comm_commit(MPID_Comm *comm)
>> for (i = 0; i< num_external; ++i) {
>> /* For rank i in the new communicator, find the corresponding
>> rank in the input communicator */
>> - MPID_VCR_Dup( comm->vcr[external_procs[i]],
>> -&comm->node_roots_comm->vcr[i] );
>> + MPID_VCR_Dup( comm->vcr, external_procs[i],
>> + comm->node_roots_comm->vcr, i );
>> }
>>
>> MPID_Dev_comm_create_hook( comm->node_roots_comm );
>> @@ -1006,7 +1006,7 @@ int MPIR_Comm_copy( MPID_Comm *comm_ptr, int size, MPID_Comm **outcomm_ptr )
>> for (i=0; i<size; i++) {
>> /* For rank i in the new communicator, find the corresponding
>> rank in the input communicator */
>> - MPID_VCR_Dup( comm_ptr->vcr[i],&newcomm_ptr->vcr[i] );
>> + MPID_VCR_Dup( comm_ptr->vcr, i, newcomm_ptr->vcr, i );
>> }
>> }
>>
>> diff --git a/mpich2/src/mpi/comm/intercomm_create.c b/mpich2/src/mpi/comm/intercomm_create.c
>> index e713830..0cb5c1e 100644
>> --- a/mpich2/src/mpi/comm/intercomm_create.c
>> +++ b/mpich2/src/mpi/comm/intercomm_create.c
>> @@ -105,7 +105,7 @@ PMPI_LOCAL int MPID_GPID_GetAllInComm( MPID_Comm *comm_ptr, int local_size,
>>
>> for (i=0; i<comm_ptr->local_size; i++) {
>> *gpid++ = 0;
>> - (void)MPID_VCR_Get_lpid( comm_ptr->vcr[i], gpid );
>> + (void)MPID_VCR_Get_lpid( comm_ptr->vcr, i, gpid );
>> gpid++;
>> }
>> *singlePG = 1;
>> @@ -140,8 +140,8 @@ PMPI_LOCAL int MPID_VCR_CommFromLpids( MPID_Comm *newcomm_ptr,
>> /* printf( "[%d] Remote rank %d has lpid %d\n",
>> MPIR_Process.comm_world->rank, i, lpids[i] ); */
>> if (lpids[i]< commworld_ptr->remote_size) {
>> - MPID_VCR_Dup( commworld_ptr->vcr[lpids[i]],
>> - &newcomm_ptr->vcr[i] );
>> + MPID_VCR_Dup( commworld_ptr->vcr, lpids[i],
>> + newcomm_ptr->vcr, i );
>> }
>> else {
>> /* We must find the corresponding vcr for a given lpid */
>> @@ -163,7 +163,7 @@ PMPI_LOCAL int MPID_LPID_GetAllInComm( MPID_Comm *comm_ptr, int local_size,
>> /* FIXME: Should be using the local_size argument */
>> MPIU_Assert( comm_ptr->local_size == local_size );
>> for (i=0; i<comm_ptr->local_size; i++) {
>> - (void)MPID_VCR_Get_lpid( comm_ptr->vcr[i],&local_lpids[i] );
>> + (void)MPID_VCR_Get_lpid( comm_ptr->vcr, i,&local_lpids[i] );
>> }
>> return 0;
>> }
>> @@ -539,7 +539,7 @@ int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader,
>> MPID_VCRT_Create( comm_ptr->local_size,&newcomm_ptr->local_vcrt );
>> MPID_VCRT_Get_ptr( newcomm_ptr->local_vcrt,&newcomm_ptr->local_vcr );
>> for (i=0; i<comm_ptr->local_size; i++) {
>> - MPID_VCR_Dup( comm_ptr->vcr[i],&newcomm_ptr->local_vcr[i] );
>> + MPID_VCR_Dup( comm_ptr->vcr, i, newcomm_ptr->local_vcr, i );
>> }
>>
>> /* Inherit the error handler (if any) */
>> diff --git a/mpich2/src/mpi/comm/intercomm_merge.c b/mpich2/src/mpi/comm/intercomm_merge.c
>> index 8fae687..b672767 100644
>> --- a/mpich2/src/mpi/comm/intercomm_merge.c
>> +++ b/mpich2/src/mpi/comm/intercomm_merge.c
>> @@ -227,11 +227,11 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, MPI_Comm *newintracomm)
>> /* remote group first */
>> j = 0;
>> for (i=0; i<comm_ptr->remote_size; i++) {
>> - MPID_VCR_Dup( comm_ptr->vcr[i],&newcomm_ptr->vcr[j++] );
>> + MPID_VCR_Dup( comm_ptr->vcr, i, newcomm_ptr->vcr, j++ );
>> }
>> for (i=0; i<comm_ptr->local_size; i++) {
>> if (i == comm_ptr->rank) newcomm_ptr->rank = j;
>> - MPID_VCR_Dup( comm_ptr->local_vcr[i],&newcomm_ptr->vcr[j++] );
>> + MPID_VCR_Dup( comm_ptr->local_vcr, i, newcomm_ptr->vcr, j++ );
>> }
>> }
>> else {
>> @@ -239,10 +239,10 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, MPI_Comm *newintracomm)
>> j = 0;
>> for (i=0; i<comm_ptr->local_size; i++) {
>> if (i == comm_ptr->rank) newcomm_ptr->rank = j;
>> - MPID_VCR_Dup( comm_ptr->local_vcr[i],&newcomm_ptr->vcr[j++] );
>> + MPID_VCR_Dup( comm_ptr->local_vcr, i, newcomm_ptr->vcr, j++ );
>> }
>> for (i=0; i<comm_ptr->remote_size; i++) {
>> - MPID_VCR_Dup( comm_ptr->vcr[i],&newcomm_ptr->vcr[j++] );
>> + MPID_VCR_Dup( comm_ptr->vcr, i, newcomm_ptr->vcr, j++ );
>> }
>> }
>>
>> diff --git a/mpich2/src/mpi/group/grouputil.c b/mpich2/src/mpi/group/grouputil.c
>> index dda81b3..963bad4 100644
>> --- a/mpich2/src/mpi/group/grouputil.c
>> +++ b/mpich2/src/mpi/group/grouputil.c
>> @@ -383,7 +383,7 @@ int MPIR_GroupCheckVCRSubset( MPID_Group *group_ptr, int vsize, MPID_VCR *vcr,
>> vsize*sizeof(MPID_Group_pmap_t),mpi_errno, "" );
>> /* Initialize the vmap */
>> for (i=0; i<vsize; i++) {
>> - MPID_VCR_Get_lpid( vcr[i],&vmap[i].lpid );
>> + MPID_VCR_Get_lpid( vcr, i,&vmap[i].lpid );
>> vmap[i].lrank = i;
>> vmap[i].next_lpid = 0;
>> vmap[i].flag = 0;
>> diff --git a/mpich2/src/mpid/dcmfd/include/mpidi_hooks.h b/mpich2/src/mpid/dcmfd/include/mpidi_hooks.h
>> index d7db400..9081a4a 100644
>> --- a/mpich2/src/mpid/dcmfd/include/mpidi_hooks.h
>> +++ b/mpich2/src/mpid/dcmfd/include/mpidi_hooks.h
>> @@ -13,7 +13,8 @@
>> #define __include_mpidi_hooks_h__
>>
>>
>> -typedef dcmf_task_t MPID_VCR;
>> +typedef void MPID_VCR;
>> +typedef dcmf_task_t MPIDI_VCR;
>> typedef struct MPIDI_VCRT * MPID_VCRT;
>>
>> typedef size_t MPIDI_msg_sz_t;
>> diff --git a/mpich2/src/mpid/dcmfd/include/mpidi_macros.h b/mpich2/src/mpid/dcmfd/include/mpidi_macros.h
>> index 3acd105..73f1be9 100644
>> --- a/mpich2/src/mpid/dcmfd/include/mpidi_macros.h
>> +++ b/mpich2/src/mpid/dcmfd/include/mpidi_macros.h
>> @@ -232,7 +232,7 @@ MPIDI_Context_local(MPID_Request * req)
>>
>> #define MPID_VCR_GET_LPID(vcr, index) \
>> ({ \
>> - vcr[index]; \
>> + ((MPIDI_VCR*)vcr)[index]; \
>> })
>> #define MPID_GPID_Get(comm_ptr, rank, gpid) \
>> { \
>> diff --git a/mpich2/src/mpid/dcmfd/src/mpid_init.c b/mpich2/src/mpid/dcmfd/src/mpid_init.c
>> index 1a31681..4acdb13 100644
>> --- a/mpich2/src/mpid/dcmfd/src/mpid_init.c
>> +++ b/mpich2/src/mpid/dcmfd/src/mpid_init.c
>> @@ -277,7 +277,7 @@ int MPID_Init(int * argc,
>> rc = MPID_VCRT_Get_ptr(comm->vcrt,&comm->vcr);
>> MPID_assert(rc == MPI_SUCCESS);
>> for (i=0; i<size; i++)
>> - comm->vcr[i] = i;
>> + ((MPIDI_VCR*)comm->vcr)[i] = i;
>>
>> /* basically a noop for now */
>> MPIDI_Comm_create(comm);
>> @@ -295,7 +295,7 @@ int MPID_Init(int * argc,
>> MPID_assert(rc == MPI_SUCCESS);
>> rc = MPID_VCRT_Get_ptr(comm->vcrt,&comm->vcr);
>> MPID_assert(rc == MPI_SUCCESS);
>> - comm->vcr[0] = rank;
>> + ((MPIDI_VCR*)comm->vcr)[0] = rank;
>>
>>
>> /* ------------------------------- */
>> diff --git a/mpich2/src/mpid/dcmfd/src/mpid_vc.c b/mpich2/src/mpid/dcmfd/src/mpid_vc.c
>> index a106fc0..2fb4769 100644
>> --- a/mpich2/src/mpid/dcmfd/src/mpid_vc.c
>> +++ b/mpich2/src/mpid/dcmfd/src/mpid_vc.c
>> @@ -17,20 +17,20 @@ struct MPIDI_VCRT
>> {
>> MPIU_OBJECT_HEADER;
>> unsigned size; /**< Number of entries in the table */
>> - MPID_VCR vcr_table[0]; /**< Array of virtual connection references */
>> + MPIDI_VCR vcr_table[0]; /**< Array of virtual connection references */
>> };
>>
>>
>> -int MPID_VCR_Dup(MPID_VCR orig_vcr, MPID_VCR * new_vcr)
>> +int MPID_VCR_Dup(const MPID_VCR *orig_vcr, unsigned origin_index, MPID_VCR *new_vcr, unsigned new_index)
>> {
>> - *new_vcr = orig_vcr;
>> - return MPI_SUCCESS;
>> + ((MPIDI_VCR*)new_vcr)[new_index] = MPID_VCR_GET_LPID(orig_vcr, origin_index);
>> + return MPI_SUCCESS;
>> }
>>
>> -int MPID_VCR_Get_lpid(MPID_VCR vcr, int * lpid_ptr)
>> +int MPID_VCR_Get_lpid(MPID_VCR *vcr, unsigned index, int * lpid_ptr)
>> {
>> - *lpid_ptr = (int)vcr;
>> - return MPI_SUCCESS;
>> + *lpid_ptr = MPID_VCR_GET_LPID(vcr, index);
>> + return MPI_SUCCESS;
>> }
>>
>> int MPID_VCRT_Create(int size, MPID_VCRT *vcrt_ptr)
>> @@ -38,7 +38,7 @@ int MPID_VCRT_Create(int size, MPID_VCRT *vcrt_ptr)
>> struct MPIDI_VCRT * vcrt;
>> int result;
>>
>> - vcrt = MPIU_Malloc(sizeof(struct MPIDI_VCRT) + size*sizeof(MPID_VCR));
>> + vcrt = MPIU_Malloc(sizeof(struct MPIDI_VCRT) + size*sizeof(MPIDI_VCR));
>> if (vcrt != NULL)
>> {
>> MPIU_Object_set_ref(vcrt, 1);
>
> --
> Pavan Balaji
> http://www.mcs.anl.gov/~balaji
More information about the mpich2-dev
mailing list