[mpich2-commits] r7827 - in mpich2/trunk/src: include mpi/pt2pt

goodell at mcs.anl.gov goodell at mcs.anl.gov
Tue Jan 25 11:18:52 CST 2011


Author: goodell
Date: 2011-01-25 11:18:52 -0600 (Tue, 25 Jan 2011)
New Revision: 7827

Modified:
   mpich2/trunk/src/include/mpiimpl.h
   mpich2/trunk/src/mpi/pt2pt/mpir_request.c
   mpich2/trunk/src/mpi/pt2pt/waitall.c
Log:
"fastpath" MPI_Waitall for high-message-rate situations

Based on suggestions from Sameer Kumar and others at IBM.

Reviewed by buntinas at .

Modified: mpich2/trunk/src/include/mpiimpl.h
===================================================================
--- mpich2/trunk/src/include/mpiimpl.h	2011-01-25 05:15:30 UTC (rev 7826)
+++ mpich2/trunk/src/include/mpiimpl.h	2011-01-25 17:18:52 UTC (rev 7827)
@@ -2080,6 +2080,7 @@
 int MPIR_dup_fn ( MPI_Comm, int, void *, void *, void *, int * );
 /* marks a request as complete, extracting the status */
 int MPIR_Request_complete(MPI_Request *, MPID_Request *, MPI_Status *, int *);
+
 int MPIR_Request_get_error(MPID_Request *);
 /* run the progress engine until the given request is complete */
 int MPIR_Progress_wait_request(MPID_Request *req);
@@ -3675,4 +3676,41 @@
 int MPIR_Comm_set_attr_impl(MPID_Comm *comm_ptr, int comm_keyval, void *attribute_val, 
                             MPIR_AttrType attrType);
 
+
+/* The "fastpath" version of MPIR_Request_complete.  It only handles
+ * MPID_REQUEST_SEND and MPID_REQUEST_RECV kinds, and it does not attempt to
+ * deal with status structures under the assumption that bleeding fast code will
+ * pass either MPI_STATUS_IGNORE or MPI_STATUSES_IGNORE as appropriate.  This
+ * routine (or some a variation of it) is an unfortunately necessary stunt to
+ * get high message rates on key benchmarks for high-end systems.
+ */
+#undef FUNCNAME
+#define FUNCNAME MPIR_Request_complete_fastpath
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+static inline int MPIR_Request_complete_fastpath(MPI_Request *request, MPID_Request *request_ptr)
+{
+    int mpi_errno = MPI_SUCCESS;
+
+    MPIU_Assert(request_ptr->kind == MPID_REQUEST_SEND || request_ptr->kind == MPID_REQUEST_RECV);
+
+    if (request_ptr->kind == MPID_REQUEST_SEND) {
+        /* FIXME: are Ibsend requests added to the send queue? */
+        MPIR_SENDQ_FORGET(request_ptr);
+    }
+
+    /* the completion path for SEND and RECV is the same at this time, modulo
+     * the SENDQ hook above */
+    mpi_errno = request_ptr->status.MPI_ERROR;
+    MPID_Request_release(request_ptr);
+    *request = MPI_REQUEST_NULL;
+
+    /* avoid normal fn_exit/fn_fail jump pattern to reduce jumps and compiler confusion */
+    return mpi_errno;
+}
+
+/* avoid conflicts in source files with old-style "char FCNAME[]" vars */
+#undef FUNCNAME
+#undef FCNAME
+
 #endif /* MPIIMPL_INCLUDED */

Modified: mpich2/trunk/src/mpi/pt2pt/mpir_request.c
===================================================================
--- mpich2/trunk/src/mpi/pt2pt/mpir_request.c	2011-01-25 05:15:30 UTC (rev 7826)
+++ mpich2/trunk/src/mpi/pt2pt/mpir_request.c	2011-01-25 17:18:52 UTC (rev 7827)
@@ -44,7 +44,6 @@
     return mpi_errno;
 }
 
-
 #undef FUNCNAME
 #define FUNCNAME MPIR_Request_complete
 #undef FCNAME

Modified: mpich2/trunk/src/mpi/pt2pt/waitall.c
===================================================================
--- mpich2/trunk/src/mpi/pt2pt/waitall.c	2011-01-25 05:15:30 UTC (rev 7826)
+++ mpich2/trunk/src/mpi/pt2pt/waitall.c	2011-01-25 17:18:52 UTC (rev 7827)
@@ -8,7 +8,8 @@
 #include "mpiimpl.h"
 
 #if !defined(MPID_REQUEST_PTR_ARRAY_SIZE)
-#define MPID_REQUEST_PTR_ARRAY_SIZE 16
+/* use a larger default size of 64 in order to enhance SQMR performance */
+#define MPID_REQUEST_PTR_ARRAY_SIZE 64
 #endif
 
 /* -- Begin Profiling Symbol Block for routine MPI_Waitall */
@@ -45,8 +46,9 @@
     int rc;
     int n_greqs;
     const int ignoring_statuses = (array_of_statuses == MPI_STATUSES_IGNORE);
+    int optimize = ignoring_statuses; /* see NOTE-O1 */
     MPIU_CHKLMEM_DECL(1);
-    
+
     /* Convert MPI request handles to a request object pointers */
     if (count > MPID_REQUEST_PTR_ARRAY_SIZE)
     {
@@ -71,6 +73,12 @@
 		MPID_END_ERROR_CHECKS;
 	    }
 #           endif
+            if (request_ptrs[i]->kind != MPID_REQUEST_RECV &&
+                request_ptrs[i]->kind != MPID_REQUEST_SEND)
+            {
+                optimize = FALSE;
+            }
+
             if (request_ptrs[i]->kind == MPID_UREQUEST)
                 ++n_greqs;
 	}
@@ -80,6 +88,7 @@
 	    MPIR_Status_set_empty(status_ptr);
 	    request_ptrs[i] = NULL;
 	    n_completed += 1;
+            optimize = FALSE;
 	}
     }
     
@@ -88,6 +97,34 @@
 	goto fn_exit;
     }
 
+    /* NOTE-O1: high-message-rate optimization.  For simple send and recv
+     * operations and MPI_STATUSES_IGNORE we use a fastpath approach that strips
+     * out as many unnecessary jumps and error handling as possible.
+     *
+     * Possible variation: permit request_ptrs[i]==NULL at the cost of an
+     * additional branch inside the for-loop below. */
+    if (optimize) {
+        for (i = 0; i < count; ++i) {
+            while (!MPID_Request_is_complete(request_ptrs[i])) {
+                mpi_errno = MPID_Progress_wait(&progress_state);
+                /* must check and handle the error, can't guard with HAVE_ERROR_CHECKING, but it's
+                 * OK for the error case to be slower */
+                if (unlikely(mpi_errno)) {
+                    /* --BEGIN ERROR HANDLING-- */
+                    MPID_Progress_end(&progress_state);
+                    MPIU_ERR_POP(mpi_errno);
+                    /* --END ERROR HANDLING-- */
+                }
+            }
+            mpi_errno = MPIR_Request_complete_fastpath(&array_of_requests[i], request_ptrs[i]);
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+        }
+
+        goto fn_exit;
+    }
+
+    /* ------ "slow" code path below ------ */
+
     /* Grequest_waitall may run the progress engine - thus, we don't 
        invoke progress_start until after running Grequest_waitall */
     /* first, complete any generalized requests */



More information about the mpich2-commits mailing list