[mpich2-commits] r3938 - in mpich2/trunk/src: include mpi/coll
balaji at mcs.anl.gov
balaji at mcs.anl.gov
Wed Mar 4 17:02:37 CST 2009
Author: balaji
Date: 2009-03-04 17:02:37 -0600 (Wed, 04 Mar 2009)
New Revision: 3938
Modified:
mpich2/trunk/src/include/mpiimpl.h
mpich2/trunk/src/mpi/coll/gatherv.c
mpich2/trunk/src/mpi/coll/helper_fns.c
Log:
Use MPI_Ssend instead of MPI_Send for Gatherv with large communicators
(more than 32 processes for now). Just using plain MPI_Send was causing
the non-root processes to run ahead and queue up a lot of unexpected
messages on the root processes. This was: (i) causing performance loss,
and (ii) causing some MPICH2 derivatives (such as BG/P) to abort due to
lack of resources. We don't use MPI_Ssend for small communicators as it
adds overhead.
Reviewed by gropp.
Modified: mpich2/trunk/src/include/mpiimpl.h
===================================================================
--- mpich2/trunk/src/include/mpiimpl.h 2009-03-04 22:01:42 UTC (rev 3937)
+++ mpich2/trunk/src/include/mpiimpl.h 2009-03-04 23:02:37 UTC (rev 3938)
@@ -3531,6 +3531,7 @@
#define MPIR_GATHER_VSMALL_MSG 1024
#define MPIR_SCATTER_SHORT_MSG 2048 /* for intercommunicator scatter */
#define MPIR_GATHER_SHORT_MSG 2048 /* for intercommunicator scatter */
+#define MPIR_GATHERV_MIN_PROCS 32
/* Tags for point to point operations which implement collective operations */
#define MPIR_BARRIER_TAG 1
@@ -3567,6 +3568,8 @@
MPI_Comm comm);
int MPIC_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag,
MPI_Comm comm, MPI_Status *status);
+int MPIC_Ssend(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+ MPI_Comm comm);
int MPIC_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
int dest, int sendtag, void *recvbuf, int recvcount,
MPI_Datatype recvtype, int source, int recvtag,
Modified: mpich2/trunk/src/mpi/coll/gatherv.c
===================================================================
--- mpich2/trunk/src/mpi/coll/gatherv.c 2009-03-04 22:01:42 UTC (rev 3937)
+++ mpich2/trunk/src/mpi/coll/gatherv.c 2009-03-04 23:02:37 UTC (rev 3938)
@@ -120,9 +120,16 @@
}
else if (root != MPI_PROC_NULL) { /* non-root nodes, and in the intercomm. case, non-root nodes on remote side */
- if (sendcnt)
- mpi_errno = MPIC_Send(sendbuf, sendcnt, sendtype, root,
- MPIR_GATHERV_TAG, comm);
+ if (sendcnt) {
+ if (comm_size >= MPIR_GATHERV_MIN_PROCS) {
+ mpi_errno = MPIC_Ssend(sendbuf, sendcnt, sendtype, root,
+ MPIR_GATHERV_TAG, comm);
+ }
+ else {
+ mpi_errno = MPIC_Send(sendbuf, sendcnt, sendtype, root,
+ MPIR_GATHERV_TAG, comm);
+ }
+ }
}
/* check if multiple threads are calling this collective function */
Modified: mpich2/trunk/src/mpi/coll/helper_fns.c
===================================================================
--- mpich2/trunk/src/mpi/coll/helper_fns.c 2009-03-04 22:01:42 UTC (rev 3937)
+++ mpich2/trunk/src/mpi/coll/helper_fns.c 2009-03-04 23:02:37 UTC (rev 3938)
@@ -95,6 +95,42 @@
}
#undef FUNCNAME
+#define FUNCNAME MPIC_Ssend
+#undef FCNAME
+#define FCNAME "MPIC_Ssend"
+int MPIC_Ssend(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+ MPI_Comm comm)
+{
+ int mpi_errno, context_id;
+ MPID_Request *request_ptr=NULL;
+ MPID_Comm *comm_ptr=NULL;
+ MPIDI_STATE_DECL(MPID_STATE_MPIC_SEND);
+
+ MPIDI_PT2PT_FUNC_ENTER_FRONT(MPID_STATE_MPIC_SEND);
+
+ MPID_Comm_get_ptr( comm, comm_ptr );
+ context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
+ MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;
+
+ mpi_errno = MPID_Ssend(buf, count, datatype, dest, tag, comm_ptr,
+ context_id, &request_ptr);
+ if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+ if (request_ptr) {
+ mpi_errno = MPIC_Wait(request_ptr);
+ if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+ MPID_Request_release(request_ptr);
+ }
+ fn_exit:
+ MPIDI_PT2PT_FUNC_EXIT(MPID_STATE_MPIC_SEND);
+ return mpi_errno;
+ fn_fail:
+ if (request_ptr) {
+ MPID_Request_release(request_ptr);
+ }
+ goto fn_exit;
+}
+
+#undef FUNCNAME
#define FUNCNAME MPIC_Sendrecv
#undef FCNAME
#define FCNAME "MPIC_Sendrecv"
More information about the mpich2-commits
mailing list