[mpich2-dev] [PATCH] Make the VCR API independent of the nature of the VCR list

Dave Goodell goodell at mcs.anl.gov
Wed Oct 6 07:25:09 CDT 2010


I sent a longer response to Joe's mail last night, but it appears to have gotten clogged in the pipes somewhere...

Basically, this patch appears to be a dup of a ticket (#1061?). I tried applying it but it causes a lot of problems in ch3 that aren't easy to solve. We should integrate this sort of functionality, but we might need to do it slightly differently. I just haven't had the time recently.

We should not attempt to slip this in at this late stage for 1.3.

-Dave

On Oct 6, 2010, at 7:44 AM, Pavan Balaji <balaji at mcs.anl.gov> wrote:

> 
> Thanks Joe. This looks good, though we'll need to port it to CH3 as well.
> 
> mpich2-core: Should this go into 1.3? This is an ADI change (though fairly minor). But we are already at 1.3rc2, so I don't know if we want to make such a change at this point.
> 
> -- Pavan
> 
> On 10/05/2010 04:40 PM, Joe Ratterman wrote:
>> From: Joe Ratterman<jratt at us.ibm.com>
>> 
>>   Currently, all implementations of a communicator rank list must
>>   contain an actual list at least as long as the number of ranks in
>>   the communicator.  This is because "comm_ptr->local_vcr[i]" is
>>   assumed to work.
>> 
>>   To avoid the list, it is possible to change the MPID_VCR API.
>>   Basically, all places that were previously passed
>>   "comm_ptr->local_vcr[i]" now contain "comm_ptr->local_vcr, i".  This
>>   means that the VCR table can be something other than a list, and
>>   that callers are requesting some processing on the "i"th value
>>   instead of actually passing the "i"th value into the function.
>> 
>>   This patch makes the necessary changes to the core of MPICH2 1.3rc2,
>>   as well as changing an alternative version of the "dcmf" device.
>> 
>> Signed-off-by: Joe Ratterman<jratt at us.ibm.com>
>> ---
>>  mpich2/src/include/mpiimpl.h                 |    4 ++--
>>  mpich2/src/mpi/comm/comm_create.c            |    4 ++--
>>  mpich2/src/mpi/comm/comm_group.c             |    2 +-
>>  mpich2/src/mpi/comm/comm_remote_group.c      |    2 +-
>>  mpich2/src/mpi/comm/comm_split.c             |   12 ++++++------
>>  mpich2/src/mpi/comm/commutil.c               |   10 +++++-----
>>  mpich2/src/mpi/comm/intercomm_create.c       |   10 +++++-----
>>  mpich2/src/mpi/comm/intercomm_merge.c        |    8 ++++----
>>  mpich2/src/mpi/group/grouputil.c             |    2 +-
>>  mpich2/src/mpid/dcmfd/include/mpidi_hooks.h  |    3 ++-
>>  mpich2/src/mpid/dcmfd/include/mpidi_macros.h |    2 +-
>>  mpich2/src/mpid/dcmfd/src/mpid_init.c        |    4 ++--
>>  mpich2/src/mpid/dcmfd/src/mpid_vc.c          |   16 ++++++++--------
>>  13 files changed, 40 insertions(+), 39 deletions(-)
>> 
>> diff --git a/mpich2/src/include/mpiimpl.h b/mpich2/src/include/mpiimpl.h
>> index 013b4fe..6fd86fd 100644
>> --- a/mpich2/src/include/mpiimpl.h
>> +++ b/mpich2/src/include/mpiimpl.h
>> @@ -3116,7 +3116,7 @@ int MPID_VCRT_Get_ptr(MPID_VCRT vcrt, MPID_VCR **vc_pptr);
>>  /*@
>>    MPID_VCR_Dup - Create a duplicate reference to a virtual connection
>>    @*/
>> -int MPID_VCR_Dup(MPID_VCR orig_vcr, MPID_VCR * new_vcr);
>> +int MPID_VCR_Dup(const MPID_VCR *orig_vcr, unsigned origin_index, MPID_VCR *new_vcr, unsigned new_index);
>> 
>>  /*@
>>     MPID_VCR_Get_lpid - Get the local process id that corresponds to a
>> @@ -3129,7 +3129,7 @@ int MPID_VCR_Dup(MPID_VCR orig_vcr, MPID_VCR * new_vcr);
>>     processes may use different ids to identify the same target process
>>    @*/
>>  /* We macro-ized this at the device. */
>> -int MPID_VCR_Get_lpid(MPID_VCR vcr, int * lpid_ptr);
>> +int MPID_VCR_Get_lpid(MPID_VCR *vcr, unsigned index, int * lpid_ptr);
>> 
>>  /* ------------------------------------------------------------------------- */
>>  /* Define a macro to allow us to select between statically selected functions
>> diff --git a/mpich2/src/mpi/comm/comm_create.c b/mpich2/src/mpi/comm/comm_create.c
>> index 33bab93..0241f49 100644
>> --- a/mpich2/src/mpi/comm/comm_create.c
>> +++ b/mpich2/src/mpi/comm/comm_create.c
>> @@ -154,7 +154,7 @@ PMPI_LOCAL int MPIR_Comm_create_calculate_mapping(MPID_Group  *group_ptr,
>>              mapping[i] = -1;
>>              for (j=0; j<vcr_size; j++) {
>>                  int comm_lpid;
>> -                MPID_VCR_Get_lpid( vcr[j],&comm_lpid );
>> +                MPID_VCR_Get_lpid( vcr, j,&comm_lpid );
>>                  if (comm_lpid == group_ptr->lrank_to_lpid[i].lpid) {
>>                      mapping[i] = j;
>>                      break;
>> @@ -207,7 +207,7 @@ PMPI_LOCAL int MPIR_Comm_create_create_and_map_vcrt(int         n,
>>          MPIU_DBG_MSG_FMT(COMM,VERBOSE,
>>                           (MPIU_DBG_FDEST, "dupping from mapping_vcr=%p rank=%d into new_rank=%d/%d in new_vcr=%p",
>>                            mapping_vcr, mapping[i], i, n, vcr));
>> -        mpi_errno = MPID_VCR_Dup(mapping_vcr[mapping[i]],&vcr[i]);
>> +        mpi_errno = MPID_VCR_Dup(mapping_vcr, mapping[i], vcr, i);
>>          if (mpi_errno) MPIU_ERR_POP(mpi_errno);
>>      }
>> 
>> diff --git a/mpich2/src/mpi/comm/comm_group.c b/mpich2/src/mpi/comm/comm_group.c
>> index 7835307..d0700ae 100644
>> --- a/mpich2/src/mpi/comm/comm_group.c
>> +++ b/mpich2/src/mpi/comm/comm_group.c
>> @@ -49,7 +49,7 @@ int MPIR_Comm_group_impl(MPID_Comm *comm_ptr, MPID_Group **group_ptr)
>>          local_vcr = comm_ptr->vcr;
>>      
>>      for (i=0; i<n; i++) {
>> -        (void) MPID_VCR_Get_lpid( local_vcr[i],&lpid );
>> +        (void) MPID_VCR_Get_lpid( local_vcr, i,&lpid );
>>          (*group_ptr)->lrank_to_lpid[i].lrank = i;
>>          (*group_ptr)->lrank_to_lpid[i].lpid  = lpid;
>>      }
>> diff --git a/mpich2/src/mpi/comm/comm_remote_group.c b/mpich2/src/mpi/comm/comm_remote_group.c
>> index a331bd0..5792df7 100644
>> --- a/mpich2/src/mpi/comm/comm_remote_group.c
>> +++ b/mpich2/src/mpi/comm/comm_remote_group.c
>> @@ -41,7 +41,7 @@ int MPIR_Comm_remote_group_impl(MPID_Comm *comm_ptr, MPID_Group **group_ptr)
>> 
>>          for (i=0; i<n; i++) {
>>              (*group_ptr)->lrank_to_lpid[i].lrank = i;
>> -            (void) MPID_VCR_Get_lpid( comm_ptr->vcr[i],&lpid );
>> +            (void) MPID_VCR_Get_lpid( comm_ptr->vcr, i,&lpid );
>>              (*group_ptr)->lrank_to_lpid[i].lpid  = lpid;
>>          }
>>          (*group_ptr)->size = n;
>> diff --git a/mpich2/src/mpi/comm/comm_split.c b/mpich2/src/mpi/comm/comm_split.c
>> index 964f241..603ee78 100644
>> --- a/mpich2/src/mpi/comm/comm_split.c
>> +++ b/mpich2/src/mpi/comm/comm_split.c
>> @@ -236,8 +236,8 @@ int MPIR_Comm_split_impl(MPID_Comm *comm_ptr, int color, int key, MPID_Comm **ne
>>          MPID_VCRT_Get_ptr( (*newcomm_ptr)->local_vcrt,
>>              &(*newcomm_ptr)->local_vcr );
>>          for (i=0; i<new_size; i++) {
>> -        MPID_VCR_Dup( comm_ptr->local_vcr[keytable[i].color],
>> -            &(*newcomm_ptr)->local_vcr[i] );
>> +        MPID_VCR_Dup( comm_ptr->local_vcr, keytable[i].color,
>> +                  (*newcomm_ptr)->local_vcr, i );
>>          if (keytable[i].color == comm_ptr->rank) {
>>              (*newcomm_ptr)->rank = i;
>>          }
>> @@ -261,8 +261,8 @@ int MPIR_Comm_split_impl(MPID_Comm *comm_ptr, int color, int key, MPID_Comm **ne
>>          MPID_VCRT_Create( new_remote_size,&(*newcomm_ptr)->vcrt );
>>          MPID_VCRT_Get_ptr( (*newcomm_ptr)->vcrt,&(*newcomm_ptr)->vcr );
>>          for (i=0; i<new_remote_size; i++) {
>> -        MPID_VCR_Dup( comm_ptr->vcr[remotekeytable[i].color],
>> -            &(*newcomm_ptr)->vcr[i] );
>> +        MPID_VCR_Dup( comm_ptr->vcr, remotekeytable[i].color,
>> +                  (*newcomm_ptr)->vcr, i );
>>          }
>> 
>>          (*newcomm_ptr)->context_id     = remote_context_id;
>> @@ -278,8 +278,8 @@ int MPIR_Comm_split_impl(MPID_Comm *comm_ptr, int color, int key, MPID_Comm **ne
>>          MPID_VCRT_Create( new_size,&(*newcomm_ptr)->vcrt );
>>          MPID_VCRT_Get_ptr( (*newcomm_ptr)->vcrt,&(*newcomm_ptr)->vcr );
>>          for (i=0; i<new_size; i++) {
>> -        MPID_VCR_Dup( comm_ptr->vcr[keytable[i].color],
>> -            &(*newcomm_ptr)->vcr[i] );
>> +        MPID_VCR_Dup( comm_ptr->vcr, keytable[i].color,
>> +                  (*newcomm_ptr)->vcr, i );
>>          if (keytable[i].color == comm_ptr->rank) {
>>              (*newcomm_ptr)->rank = i;
>>          }
>> diff --git a/mpich2/src/mpi/comm/commutil.c b/mpich2/src/mpi/comm/commutil.c
>> index 1eeaedc..fb7108d 100644
>> --- a/mpich2/src/mpi/comm/commutil.c
>> +++ b/mpich2/src/mpi/comm/commutil.c
>> @@ -269,8 +269,8 @@ int MPIR_Comm_commit(MPID_Comm *comm)
>>              for (i = 0; i<  num_local; ++i) {
>>                  /* For rank i in the new communicator, find the corresponding
>>                     rank in the input communicator */
>> -                MPID_VCR_Dup( comm->vcr[local_procs[i]],
>> -&comm->node_comm->vcr[i] );
>> +                MPID_VCR_Dup( comm->vcr, local_procs[i],
>> +                              comm->node_comm->vcr, i );
>>              }
>> 
>>              MPID_Dev_comm_create_hook( comm->node_comm );
>> @@ -297,8 +297,8 @@ int MPIR_Comm_commit(MPID_Comm *comm)
>>              for (i = 0; i<  num_external; ++i) {
>>                  /* For rank i in the new communicator, find the corresponding
>>                     rank in the input communicator */
>> -                MPID_VCR_Dup( comm->vcr[external_procs[i]],
>> -&comm->node_roots_comm->vcr[i] );
>> +                MPID_VCR_Dup( comm->vcr, external_procs[i],
>> +                              comm->node_roots_comm->vcr, i );
>>              }
>> 
>>              MPID_Dev_comm_create_hook( comm->node_roots_comm );
>> @@ -1006,7 +1006,7 @@ int MPIR_Comm_copy( MPID_Comm *comm_ptr, int size, MPID_Comm **outcomm_ptr )
>>      for (i=0; i<size; i++) {
>>          /* For rank i in the new communicator, find the corresponding
>>             rank in the input communicator */
>> -        MPID_VCR_Dup( comm_ptr->vcr[i],&newcomm_ptr->vcr[i] );
>> +        MPID_VCR_Dup( comm_ptr->vcr, i, newcomm_ptr->vcr, i );
>>      }
>>      }
>> 
>> diff --git a/mpich2/src/mpi/comm/intercomm_create.c b/mpich2/src/mpi/comm/intercomm_create.c
>> index e713830..0cb5c1e 100644
>> --- a/mpich2/src/mpi/comm/intercomm_create.c
>> +++ b/mpich2/src/mpi/comm/intercomm_create.c
>> @@ -105,7 +105,7 @@ PMPI_LOCAL int MPID_GPID_GetAllInComm( MPID_Comm *comm_ptr, int local_size,
>> 
>>      for (i=0; i<comm_ptr->local_size; i++) {
>>      *gpid++ = 0;
>> -    (void)MPID_VCR_Get_lpid( comm_ptr->vcr[i], gpid );
>> +    (void)MPID_VCR_Get_lpid( comm_ptr->vcr, i, gpid );
>>      gpid++;
>>      }
>>      *singlePG = 1;
>> @@ -140,8 +140,8 @@ PMPI_LOCAL int MPID_VCR_CommFromLpids( MPID_Comm *newcomm_ptr,
>>      /* printf( "[%d] Remote rank %d has lpid %d\n",
>>         MPIR_Process.comm_world->rank, i, lpids[i] ); */
>>      if (lpids[i]<  commworld_ptr->remote_size) {
>> -        MPID_VCR_Dup( commworld_ptr->vcr[lpids[i]],
>> -            &newcomm_ptr->vcr[i] );
>> +        MPID_VCR_Dup( commworld_ptr->vcr, lpids[i],
>> +              newcomm_ptr->vcr, i );
>>      }
>>      else {
>>          /* We must find the corresponding vcr for a given lpid */
>> @@ -163,7 +163,7 @@ PMPI_LOCAL int MPID_LPID_GetAllInComm( MPID_Comm *comm_ptr, int local_size,
>>      /* FIXME: Should be using the local_size argument */
>>      MPIU_Assert( comm_ptr->local_size == local_size );
>>      for (i=0; i<comm_ptr->local_size; i++) {
>> -    (void)MPID_VCR_Get_lpid( comm_ptr->vcr[i],&local_lpids[i] );
>> +    (void)MPID_VCR_Get_lpid( comm_ptr->vcr, i,&local_lpids[i] );
>>      }
>>      return 0;
>>  }
>> @@ -539,7 +539,7 @@ int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader,
>>      MPID_VCRT_Create( comm_ptr->local_size,&newcomm_ptr->local_vcrt );
>>      MPID_VCRT_Get_ptr( newcomm_ptr->local_vcrt,&newcomm_ptr->local_vcr );
>>      for (i=0; i<comm_ptr->local_size; i++) {
>> -    MPID_VCR_Dup( comm_ptr->vcr[i],&newcomm_ptr->local_vcr[i] );
>> +    MPID_VCR_Dup( comm_ptr->vcr, i, newcomm_ptr->local_vcr, i );
>>      }
>> 
>>      /* Inherit the error handler (if any) */
>> diff --git a/mpich2/src/mpi/comm/intercomm_merge.c b/mpich2/src/mpi/comm/intercomm_merge.c
>> index 8fae687..b672767 100644
>> --- a/mpich2/src/mpi/comm/intercomm_merge.c
>> +++ b/mpich2/src/mpi/comm/intercomm_merge.c
>> @@ -227,11 +227,11 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, MPI_Comm *newintracomm)
>>      /* remote group first */
>>      j = 0;
>>      for (i=0; i<comm_ptr->remote_size; i++) {
>> -        MPID_VCR_Dup( comm_ptr->vcr[i],&newcomm_ptr->vcr[j++] );
>> +        MPID_VCR_Dup( comm_ptr->vcr, i, newcomm_ptr->vcr, j++ );
>>      }
>>      for (i=0; i<comm_ptr->local_size; i++) {
>>          if (i == comm_ptr->rank) newcomm_ptr->rank = j;
>> -        MPID_VCR_Dup( comm_ptr->local_vcr[i],&newcomm_ptr->vcr[j++] );
>> +        MPID_VCR_Dup( comm_ptr->local_vcr, i, newcomm_ptr->vcr, j++ );
>>      }
>>      }
>>      else {
>> @@ -239,10 +239,10 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, MPI_Comm *newintracomm)
>>      j = 0;
>>      for (i=0; i<comm_ptr->local_size; i++) {
>>          if (i == comm_ptr->rank) newcomm_ptr->rank = j;
>> -        MPID_VCR_Dup( comm_ptr->local_vcr[i],&newcomm_ptr->vcr[j++] );
>> +        MPID_VCR_Dup( comm_ptr->local_vcr, i, newcomm_ptr->vcr, j++ );
>>      }
>>      for (i=0; i<comm_ptr->remote_size; i++) {
>> -        MPID_VCR_Dup( comm_ptr->vcr[i],&newcomm_ptr->vcr[j++] );
>> +        MPID_VCR_Dup( comm_ptr->vcr, i, newcomm_ptr->vcr, j++ );
>>      }
>>      }
>> 
>> diff --git a/mpich2/src/mpi/group/grouputil.c b/mpich2/src/mpi/group/grouputil.c
>> index dda81b3..963bad4 100644
>> --- a/mpich2/src/mpi/group/grouputil.c
>> +++ b/mpich2/src/mpi/group/grouputil.c
>> @@ -383,7 +383,7 @@ int MPIR_GroupCheckVCRSubset( MPID_Group *group_ptr, int vsize, MPID_VCR *vcr,
>>              vsize*sizeof(MPID_Group_pmap_t),mpi_errno, "" );
>>      /* Initialize the vmap */
>>      for (i=0; i<vsize; i++) {
>> -    MPID_VCR_Get_lpid( vcr[i],&vmap[i].lpid );
>> +    MPID_VCR_Get_lpid( vcr, i,&vmap[i].lpid );
>>      vmap[i].lrank     = i;
>>      vmap[i].next_lpid = 0;
>>      vmap[i].flag      = 0;
>> diff --git a/mpich2/src/mpid/dcmfd/include/mpidi_hooks.h b/mpich2/src/mpid/dcmfd/include/mpidi_hooks.h
>> index d7db400..9081a4a 100644
>> --- a/mpich2/src/mpid/dcmfd/include/mpidi_hooks.h
>> +++ b/mpich2/src/mpid/dcmfd/include/mpidi_hooks.h
>> @@ -13,7 +13,8 @@
>>  #define __include_mpidi_hooks_h__
>> 
>> 
>> -typedef dcmf_task_t         MPID_VCR;
>> +typedef void                MPID_VCR;
>> +typedef dcmf_task_t         MPIDI_VCR;
>>  typedef struct MPIDI_VCRT * MPID_VCRT;
>> 
>>  typedef size_t              MPIDI_msg_sz_t;
>> diff --git a/mpich2/src/mpid/dcmfd/include/mpidi_macros.h b/mpich2/src/mpid/dcmfd/include/mpidi_macros.h
>> index 3acd105..73f1be9 100644
>> --- a/mpich2/src/mpid/dcmfd/include/mpidi_macros.h
>> +++ b/mpich2/src/mpid/dcmfd/include/mpidi_macros.h
>> @@ -232,7 +232,7 @@ MPIDI_Context_local(MPID_Request * req)
>> 
>>  #define MPID_VCR_GET_LPID(vcr, index)           \
>>  ({                                              \
>> -  vcr[index];                                   \
>> +  ((MPIDI_VCR*)vcr)[index];                     \
>>  })
>>  #define MPID_GPID_Get(comm_ptr, rank, gpid)             \
>>  {                                                       \
>> diff --git a/mpich2/src/mpid/dcmfd/src/mpid_init.c b/mpich2/src/mpid/dcmfd/src/mpid_init.c
>> index 1a31681..4acdb13 100644
>> --- a/mpich2/src/mpid/dcmfd/src/mpid_init.c
>> +++ b/mpich2/src/mpid/dcmfd/src/mpid_init.c
>> @@ -277,7 +277,7 @@ int MPID_Init(int * argc,
>>    rc = MPID_VCRT_Get_ptr(comm->vcrt,&comm->vcr);
>>    MPID_assert(rc == MPI_SUCCESS);
>>    for (i=0; i<size; i++)
>> -    comm->vcr[i] = i;
>> +    ((MPIDI_VCR*)comm->vcr)[i] = i;
>> 
>>     /* basically a noop for now */
>>    MPIDI_Comm_create(comm);
>> @@ -295,7 +295,7 @@ int MPID_Init(int * argc,
>>    MPID_assert(rc == MPI_SUCCESS);
>>    rc = MPID_VCRT_Get_ptr(comm->vcrt,&comm->vcr);
>>    MPID_assert(rc == MPI_SUCCESS);
>> -  comm->vcr[0] = rank;
>> +  ((MPIDI_VCR*)comm->vcr)[0] = rank;
>> 
>> 
>>    /* ------------------------------- */
>> diff --git a/mpich2/src/mpid/dcmfd/src/mpid_vc.c b/mpich2/src/mpid/dcmfd/src/mpid_vc.c
>> index a106fc0..2fb4769 100644
>> --- a/mpich2/src/mpid/dcmfd/src/mpid_vc.c
>> +++ b/mpich2/src/mpid/dcmfd/src/mpid_vc.c
>> @@ -17,20 +17,20 @@ struct MPIDI_VCRT
>>  {
>>    MPIU_OBJECT_HEADER;
>>    unsigned size;          /**<  Number of entries in the table */
>> -  MPID_VCR vcr_table[0];  /**<  Array of virtual connection references */
>> +  MPIDI_VCR vcr_table[0]; /**<  Array of virtual connection references */
>>  };
>> 
>> 
>> -int MPID_VCR_Dup(MPID_VCR orig_vcr, MPID_VCR * new_vcr)
>> +int MPID_VCR_Dup(const MPID_VCR *orig_vcr, unsigned origin_index, MPID_VCR *new_vcr, unsigned new_index)
>>  {
>> -    *new_vcr = orig_vcr;
>> -    return MPI_SUCCESS;
>> +  ((MPIDI_VCR*)new_vcr)[new_index] = MPID_VCR_GET_LPID(orig_vcr, origin_index);
>> +  return MPI_SUCCESS;
>>  }
>> 
>> -int MPID_VCR_Get_lpid(MPID_VCR vcr, int * lpid_ptr)
>> +int MPID_VCR_Get_lpid(MPID_VCR *vcr, unsigned index, int * lpid_ptr)
>>  {
>> -    *lpid_ptr = (int)vcr;
>> -    return MPI_SUCCESS;
>> +  *lpid_ptr = MPID_VCR_GET_LPID(vcr, index);
>> +  return MPI_SUCCESS;
>>  }
>> 
>>  int MPID_VCRT_Create(int size, MPID_VCRT *vcrt_ptr)
>> @@ -38,7 +38,7 @@ int MPID_VCRT_Create(int size, MPID_VCRT *vcrt_ptr)
>>      struct MPIDI_VCRT * vcrt;
>>      int result;
>> 
>> -    vcrt = MPIU_Malloc(sizeof(struct MPIDI_VCRT) + size*sizeof(MPID_VCR));
>> +    vcrt = MPIU_Malloc(sizeof(struct MPIDI_VCRT) + size*sizeof(MPIDI_VCR));
>>      if (vcrt != NULL)
>>      {
>>          MPIU_Object_set_ref(vcrt, 1);
> 
> -- 
> Pavan Balaji
> http://www.mcs.anl.gov/~balaji


More information about the mpich2-dev mailing list