[mpich2-commits] r3938 - in mpich2/trunk/src: include mpi/coll

balaji at mcs.anl.gov balaji at mcs.anl.gov
Wed Mar 4 17:02:37 CST 2009


Author: balaji
Date: 2009-03-04 17:02:37 -0600 (Wed, 04 Mar 2009)
New Revision: 3938

Modified:
   mpich2/trunk/src/include/mpiimpl.h
   mpich2/trunk/src/mpi/coll/gatherv.c
   mpich2/trunk/src/mpi/coll/helper_fns.c
Log:
Use MPI_Ssend instead of MPI_Send for Gatherv with large communicators
(more than 32 processes for now). Just using plain MPI_Send was causing
the non-root processes to run ahead and queue up a lot of unexpected
messages on the root processes. This was: (i) causing performance loss,
and (ii) causing some MPICH2 derivatives (such as BG/P) to abort due to
lack of resources. We don't use MPI_Ssend for small communicators as it
adds overhead.

Reviewed by gropp.



Modified: mpich2/trunk/src/include/mpiimpl.h
===================================================================
--- mpich2/trunk/src/include/mpiimpl.h	2009-03-04 22:01:42 UTC (rev 3937)
+++ mpich2/trunk/src/include/mpiimpl.h	2009-03-04 23:02:37 UTC (rev 3938)
@@ -3531,6 +3531,7 @@
 #define MPIR_GATHER_VSMALL_MSG        1024
 #define MPIR_SCATTER_SHORT_MSG        2048  /* for intercommunicator scatter */
 #define MPIR_GATHER_SHORT_MSG         2048  /* for intercommunicator scatter */
+#define MPIR_GATHERV_MIN_PROCS        32
 
 /* Tags for point to point operations which implement collective operations */
 #define MPIR_BARRIER_TAG               1
@@ -3567,6 +3568,8 @@
               MPI_Comm comm);
 int MPIC_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag,
               MPI_Comm comm, MPI_Status *status);
+int MPIC_Ssend(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+               MPI_Comm comm);
 int MPIC_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
                   int dest, int sendtag, void *recvbuf, int recvcount,
                   MPI_Datatype recvtype, int source, int recvtag,

Modified: mpich2/trunk/src/mpi/coll/gatherv.c
===================================================================
--- mpich2/trunk/src/mpi/coll/gatherv.c	2009-03-04 22:01:42 UTC (rev 3937)
+++ mpich2/trunk/src/mpi/coll/gatherv.c	2009-03-04 23:02:37 UTC (rev 3938)
@@ -120,9 +120,16 @@
     }
 
     else if (root != MPI_PROC_NULL) { /* non-root nodes, and in the intercomm. case, non-root nodes on remote side */
-        if (sendcnt)
-            mpi_errno = MPIC_Send(sendbuf, sendcnt, sendtype, root, 
-                                  MPIR_GATHERV_TAG, comm);
+        if (sendcnt) {
+            if (comm_size >= MPIR_GATHERV_MIN_PROCS) {
+                mpi_errno = MPIC_Ssend(sendbuf, sendcnt, sendtype, root, 
+                                       MPIR_GATHERV_TAG, comm);
+            }
+            else {
+                mpi_errno = MPIC_Send(sendbuf, sendcnt, sendtype, root, 
+                                      MPIR_GATHERV_TAG, comm);
+            }
+        }
     }
     
     /* check if multiple threads are calling this collective function */

Modified: mpich2/trunk/src/mpi/coll/helper_fns.c
===================================================================
--- mpich2/trunk/src/mpi/coll/helper_fns.c	2009-03-04 22:01:42 UTC (rev 3937)
+++ mpich2/trunk/src/mpi/coll/helper_fns.c	2009-03-04 23:02:37 UTC (rev 3938)
@@ -95,6 +95,42 @@
 }
 
 #undef FUNCNAME
+#define FUNCNAME MPIC_Ssend
+#undef FCNAME
+#define FCNAME "MPIC_Ssend"
+int MPIC_Ssend(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+               MPI_Comm comm)
+{
+    int mpi_errno, context_id;
+    MPID_Request *request_ptr=NULL;
+    MPID_Comm *comm_ptr=NULL;
+    MPIDI_STATE_DECL(MPID_STATE_MPIC_SEND);
+
+    MPIDI_PT2PT_FUNC_ENTER_FRONT(MPID_STATE_MPIC_SEND);
+
+    MPID_Comm_get_ptr( comm, comm_ptr );
+    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
+        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;
+
+    mpi_errno = MPID_Ssend(buf, count, datatype, dest, tag, comm_ptr,
+                           context_id, &request_ptr); 
+    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+    if (request_ptr) {
+        mpi_errno = MPIC_Wait(request_ptr);
+	if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+	MPID_Request_release(request_ptr);
+    }
+ fn_exit:
+    MPIDI_PT2PT_FUNC_EXIT(MPID_STATE_MPIC_SEND);
+    return mpi_errno;
+ fn_fail:
+    if (request_ptr) {
+        MPID_Request_release(request_ptr);
+    }
+    goto fn_exit;
+}
+
+#undef FUNCNAME
 #define FUNCNAME MPIC_Sendrecv
 #undef FCNAME
 #define FCNAME "MPIC_Sendrecv"



More information about the mpich2-commits mailing list