[mpich2-commits] r5500 - in mpich2/trunk: . src/include src/mpi/comm src/mpid/ch3/src

goodell at mcs.anl.gov goodell at mcs.anl.gov
Tue Oct 20 15:35:32 CDT 2009


Author: goodell
Date: 2009-10-20 15:35:32 -0500 (Tue, 20 Oct 2009)
New Revision: 5500

Modified:
   mpich2/trunk/configure.in
   mpich2/trunk/src/include/mpihandlemem.h
   mpich2/trunk/src/include/mpiimpl.h
   mpich2/trunk/src/mpi/comm/commutil.c
   mpich2/trunk/src/mpid/ch3/src/mpid_finalize.c
   mpich2/trunk/src/mpid/ch3/src/mpid_vc.c
Log:
Don't refcount predefined objects when using lock-free refcounting.

This optimization can greatly improve multi-threaded message rates for
MPI_COMM_WORLD, particularly on platforms where a fetch-and-inc is
potentially expensive (like BG/P).

No reviewer.

Modified: mpich2/trunk/configure.in
===================================================================
--- mpich2/trunk/configure.in	2009-10-20 20:35:30 UTC (rev 5499)
+++ mpich2/trunk/configure.in	2009-10-20 20:35:32 UTC (rev 5500)
@@ -1156,6 +1156,7 @@
     ;;
     lock-free|lock_free|lockfree)
     thread_refcount=MPIU_REFCOUNT_LOCKFREE
+    AC_DEFINE([MPIU_THREAD_SUPPRESS_PREDEFINED_REFCOUNTS],[1],[define to disable reference counting predefined objects like MPI_COMM_WORLD])
     ;;
     none)
     thread_refcount=MPIU_REFCOUNT_NONE

Modified: mpich2/trunk/src/include/mpihandlemem.h
===================================================================
--- mpich2/trunk/src/include/mpihandlemem.h	2009-10-20 20:35:30 UTC (rev 5499)
+++ mpich2/trunk/src/include/mpihandlemem.h	2009-10-20 20:35:32 UTC (rev 5500)
@@ -196,6 +196,10 @@
 .ve
   M*/
 
+/* The "_always" versions of these macros unconditionally manipulate the
+ * reference count of the given object.  They exist to permit an optimization
+ * of not reference counting predefined objects. */
+
 /* The MPIU_DBG... statements are macros that vanish unless
    --enable-g=log is selected.  MPIU_HANDLE_CHECK_REFCOUNT is
    defined above, and adds an additional sanity check for the refcounts
@@ -222,7 +226,7 @@
 #define MPIU_Object_get_ref(objptr_) \
     ((objptr_)->ref_count)
 
-#define MPIU_Object_add_ref(objptr_)                      \
+#define MPIU_Object_add_ref_always(objptr_)               \
     do {                                                  \
         MPIU_THREAD_CS_ENTER(HANDLE,objptr_);             \
         (objptr_)->ref_count++;                           \
@@ -230,7 +234,7 @@
         MPIU_HANDLE_CHECK_REFCOUNT(objptr_,"incr");       \
         MPIU_THREAD_CS_EXIT(HANDLE,objptr_);              \
     } while (0)
-#define MPIU_Object_release_ref(objptr_,inuse_ptr)        \
+#define MPIU_Object_release_ref_always(objptr_,inuse_ptr) \
     do {                                                  \
         MPIU_THREAD_CS_ENTER(HANDLE,objptr_);             \
         *(inuse_ptr) = --((objptr_)->ref_count);          \
@@ -254,13 +258,13 @@
 #define MPIU_Object_get_ref(objptr_) \
     (OPA_load_int(&(objptr_)->ref_count))
 
-#define MPIU_Object_add_ref(objptr_)                      \
+#define MPIU_Object_add_ref_always(objptr_)               \
     do {                                                  \
         OPA_incr_int(&((objptr_)->ref_count));            \
         MPIU_HANDLE_LOG_REFCOUNT_CHANGE(objptr_, "incr"); \
         MPIU_HANDLE_CHECK_REFCOUNT(objptr_,"incr");       \
     } while (0)
-#define MPIU_Object_release_ref(objptr_,inuse_ptr)                      \
+#define MPIU_Object_release_ref_always(objptr_,inuse_ptr)               \
     do {                                                                \
         int got_zero_ = OPA_decr_and_test_int(&((objptr_)->ref_count)); \
         *(inuse_ptr) = got_zero_ ? 0 : 1;                               \
@@ -268,6 +272,49 @@
         MPIU_HANDLE_CHECK_REFCOUNT(objptr_,"decr");                     \
     } while (0)
 #endif
+
+/* TODO someday we should probably always suppress predefined object refcounting,
+ * but we don't have total confidence in it yet.  So until we gain sufficient
+ * confidence, this is a configurable option. */
+#if defined(MPIU_THREAD_SUPPRESS_PREDEFINED_REFCOUNTS)
+
+/* The assumption here is that objects with handles of type HANDLE_KIND_BUILTIN
+ * will be created/destroyed only at MPI_Init/MPI_Finalize time and don't need
+ * to be reference counted.  This can be a big performance win on some
+ * platforms, such as BG/P.
+ *
+ * It is also assumed that any object being reference counted via these macros
+ * will have a valid value in the handle field, even if it is
+ * HANDLE_SET_KIND(0, HANDLE_KIND_INVALID) */
+#define MPIU_Object_add_ref(objptr_)                           \
+    do {                                                       \
+        int handle_kind_ = HANDLE_GET_KIND((objptr_)->handle); \
+        if (handle_kind_ != HANDLE_KIND_BUILTIN) {             \
+            MPIU_Object_add_ref_always((objptr_));             \
+        }                                                      \
+    } while (0)
+#define MPIU_Object_release_ref(objptr_,inuse_ptr_)                  \
+    do {                                                             \
+        int handle_kind_ = HANDLE_GET_KIND((objptr_)->handle);       \
+        if (handle_kind_ != HANDLE_KIND_BUILTIN) {                   \
+            MPIU_Object_release_ref_always((objptr_), (inuse_ptr_)); \
+        }                                                            \
+        else {                                                       \
+            *(inuse_ptr_) = 1;                                       \
+        }                                                            \
+    } while (0)
+
+#else /* !defined(MPIU_THREAD_SUPPRESS_PREDEFINED_REFCOUNTS) */
+
+/* the base case, where we just always manipulate the reference counts */
+#define MPIU_Object_add_ref(objptr_) \
+    MPIU_Object_add_ref_always((objptr_))
+#define MPIU_Object_release_ref(objptr_,inuse_ptr_) \
+    MPIU_Object_release_ref_always((objptr_),(inuse_ptr_))
+
+#endif
+
+
 /* end reference counting macros */
 /* ------------------------------------------------------------------------- */
 

Modified: mpich2/trunk/src/include/mpiimpl.h
===================================================================
--- mpich2/trunk/src/include/mpiimpl.h	2009-10-20 20:35:30 UTC (rev 5499)
+++ mpich2/trunk/src/include/mpiimpl.h	2009-10-20 20:35:32 UTC (rev 5500)
@@ -1131,11 +1131,9 @@
 /* MPIR_Comm_release is a helper routine that releases references to a comm.
    The second arg is false unless this is called as part of 
    MPI_Comm_disconnect .
-
-   Question: Should this only be called if the ref count on the 
-   comm is zero, thus avoiding a function call in the typical case?
 */
 int MPIR_Comm_release(MPID_Comm *, int );
+int MPIR_Comm_release_always(MPID_Comm *comm_ptr, int isDisconnect);
 
 #define MPIR_Comm_add_ref(_comm) \
     do { MPIU_Object_add_ref((_comm)); } while (0)

Modified: mpich2/trunk/src/mpi/comm/commutil.c
===================================================================
--- mpich2/trunk/src/mpi/comm/commutil.c	2009-10-20 20:35:30 UTC (rev 5499)
+++ mpich2/trunk/src/mpi/comm/commutil.c	2009-10-20 20:35:32 UTC (rev 5500)
@@ -1014,6 +1014,129 @@
     return mpi_errno;
 }
 
+/* Common body between MPIR_Comm_release and MPIR_comm_release_always.  This
+ * helper function frees the actual MPID_Comm structure and any associated
+ * storage.  It also releases any refernces to other objects, such as the VCRT.
+ * This function should only be called when the communicator's reference count
+ * has dropped to 0. */
+#undef FUNCNAME
+#define FUNCNAME comm_delete
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+static int comm_delete(MPID_Comm * comm_ptr, int isDisconnect)
+{
+    int in_use;
+    int mpi_errno = MPI_SUCCESS;
+    MPID_MPI_STATE_DECL(MPID_STATE_COMM_DELETE);
+
+    MPID_MPI_FUNC_ENTER(MPID_STATE_COMM_DELETE);
+
+    MPIU_Assert(MPIU_Object_get_ref(comm_ptr) == 0); /* sanity check */
+
+    /* Remove the attributes, executing the attribute delete routine.
+       Do this only if the attribute functions are defined.
+       This must be done first, because if freeing the attributes
+       returns an error, the communicator is not freed */
+    if (MPIR_Process.attr_free && comm_ptr->attributes) {
+        /* Temporarily add a reference to this communicator because
+           the attr_free code requires a valid communicator */
+        MPIU_Object_add_ref( comm_ptr );
+        mpi_errno = MPIR_Process.attr_free( comm_ptr->handle,
+                                            &comm_ptr->attributes );
+        /* Release the temporary reference added before the call to
+           attr_free */
+        MPIU_Object_release_ref( comm_ptr, &in_use);
+    }
+
+    /* If the attribute delete functions return failure, the
+       communicator must not be freed.  That is the reason for the
+       test on mpi_errno here. */
+    if (mpi_errno == MPI_SUCCESS) {
+        /* If this communicator is our parent, and we're disconnecting
+           from the parent, mark that fact */
+        if (MPIR_Process.comm_parent == comm_ptr)
+            MPIR_Process.comm_parent = NULL;
+
+        /* Notify the device that the communicator is about to be
+           destroyed */
+        MPID_Dev_comm_destroy_hook(comm_ptr);
+
+        /* Free the VCRT */
+        mpi_errno = MPID_VCRT_Release(comm_ptr->vcrt, isDisconnect);
+        if (mpi_errno != MPI_SUCCESS) {
+            MPIU_ERR_POP(mpi_errno);
+        }
+        if (comm_ptr->comm_kind == MPID_INTERCOMM) {
+            mpi_errno = MPID_VCRT_Release(
+                                          comm_ptr->local_vcrt, isDisconnect);
+            if (mpi_errno != MPI_SUCCESS) {
+                MPIU_ERR_POP(mpi_errno);
+            }
+            if (comm_ptr->local_comm)
+                MPIR_Comm_release(comm_ptr->local_comm, isDisconnect );
+        }
+
+        /* Free the local and remote groups, if they exist */
+        if (comm_ptr->local_group)
+            MPIR_Group_release(comm_ptr->local_group);
+        if (comm_ptr->remote_group)
+            MPIR_Group_release(comm_ptr->remote_group);
+
+        /* free the intra/inter-node communicators, if they exist */
+        if (comm_ptr->node_comm)
+            MPIR_Comm_release(comm_ptr->node_comm, isDisconnect);
+        if (comm_ptr->node_roots_comm)
+            MPIR_Comm_release(comm_ptr->node_roots_comm, isDisconnect);
+        if (comm_ptr->intranode_table != NULL)
+            MPIU_Free(comm_ptr->intranode_table);
+        if (comm_ptr->internode_table != NULL)
+            MPIU_Free(comm_ptr->internode_table);
+
+        /* Free the context value.  This should come after freeing the
+         * intra/inter-node communicators since those free calls won't
+         * release this context ID and releasing this before then could lead
+         * to races once we make threading finer grained. */
+        /* This must be the recvcontext_id (i.e. not the (send)context_id)
+         * because in the case of intercommunicators the send context ID is
+         * allocated out of the remote group's bit vector, not ours. */
+        MPIR_Free_contextid( comm_ptr->recvcontext_id );
+
+        /* We need to release the error handler */
+        if (comm_ptr->errhandler &&
+            ! (HANDLE_GET_KIND(comm_ptr->errhandler->handle) ==
+               HANDLE_KIND_BUILTIN) ) {
+            int errhInuse;
+            MPIR_Errhandler_release_ref( comm_ptr->errhandler,&errhInuse);
+            if (!errhInuse) {
+                MPIU_Handle_obj_free( &MPID_Errhandler_mem,
+                                      comm_ptr->errhandler );
+            }
+        }
+
+        /* Check for predefined communicators - these should not
+           be freed */
+        if (! (HANDLE_GET_KIND(comm_ptr->handle) == HANDLE_KIND_BUILTIN) )
+            MPIU_Handle_obj_free( &MPID_Comm_mem, comm_ptr );
+
+        /* Remove from the list of active communicators if
+           we are supporting message-queue debugging.  We make this
+           conditional on having debugger support since the
+           operation is not constant-time */
+        MPIR_COMML_FORGET( comm_ptr );
+    }
+    else {
+        /* If the user attribute free function returns an error,
+           then do not free the communicator */
+        MPIR_Comm_add_ref( comm_ptr );
+    }
+
+ fn_exit:
+    MPID_MPI_FUNC_EXIT(MPID_STATE_COMM_DELETE);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
 /* Release a reference to a communicator.  If there are no pending
    references, delete the communicator and recover all storage and 
    context ids */
@@ -1024,114 +1147,52 @@
 int MPIR_Comm_release(MPID_Comm * comm_ptr, int isDisconnect)
 {
     int mpi_errno = MPI_SUCCESS;
-    int inuse;
+    int in_use;
     MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_RELEASE);
 
     MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_RELEASE);
-    
-    MPIR_Comm_release_ref( comm_ptr, &inuse );
-    if (!inuse) {
-	/* Remove the attributes, executing the attribute delete routine.  
-           Do this only if the attribute functions are defined. 
-	   This must be done first, because if freeing the attributes
-	   returns an error, the communicator is not freed */
-	if (MPIR_Process.attr_free && comm_ptr->attributes) {
-	    /* Temporarily add a reference to this communicator because 
-               the attr_free code requires a valid communicator */ 
-	    MPIU_Object_add_ref( comm_ptr ); 
-	    mpi_errno = MPIR_Process.attr_free( comm_ptr->handle, 
-						&comm_ptr->attributes );
-	    /* Release the temporary reference added before the call to 
-               attr_free */ 
-	    MPIU_Object_release_ref( comm_ptr, &inuse); 
-	}
 
-	/* If the attribute delete functions return failure, the
-	   communicator must not be freed.  That is the reason for the
-	   test on mpi_errno here. */
-	if (mpi_errno == MPI_SUCCESS) {
-	    /* If this communicator is our parent, and we're disconnecting
-	       from the parent, mark that fact */
-	    if (MPIR_Process.comm_parent == comm_ptr)
-		MPIR_Process.comm_parent = NULL;
+    MPIR_Comm_release_ref(comm_ptr, &in_use);
+    if (!in_use) {
+        mpi_errno = comm_delete(comm_ptr, isDisconnect);
+        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+    }
 
-	    /* Notify the device that the communicator is about to be 
-	       destroyed */
-	    MPID_Dev_comm_destroy_hook(comm_ptr);
-	    
-	    /* Free the VCRT */
-	    mpi_errno = MPID_VCRT_Release(comm_ptr->vcrt, isDisconnect);
-	    if (mpi_errno != MPI_SUCCESS) {
-		MPIU_ERR_POP(mpi_errno);
-	    }
-            if (comm_ptr->comm_kind == MPID_INTERCOMM) {
-                mpi_errno = MPID_VCRT_Release(
-		    comm_ptr->local_vcrt, isDisconnect);
-		if (mpi_errno != MPI_SUCCESS) {
-		    MPIU_ERR_POP(mpi_errno);
-		}
-                if (comm_ptr->local_comm) 
-                    MPIR_Comm_release(comm_ptr->local_comm, isDisconnect );
-            }
+ fn_exit:
+    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_RELEASE);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
 
-	    /* Free the local and remote groups, if they exist */
-            if (comm_ptr->local_group)
-                MPIR_Group_release(comm_ptr->local_group);
-            if (comm_ptr->remote_group)
-                MPIR_Group_release(comm_ptr->remote_group);
+/* Release a reference to a communicator.  If there are no pending
+   references, delete the communicator and recover all storage and
+   context ids.  This version of the function always manipulates the reference
+   counts, even for predefined objects. */
+#undef FUNCNAME
+#define FUNCNAME MPIR_Comm_release_always
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPIR_Comm_release_always(MPID_Comm *comm_ptr, int isDisconnect)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int in_use;
+    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_RELEASE_ALWAYS);
 
-            /* free the intra/inter-node communicators, if they exist */
-            if (comm_ptr->node_comm)
-                MPIR_Comm_release(comm_ptr->node_comm, isDisconnect);
-            if (comm_ptr->node_roots_comm)
-                MPIR_Comm_release(comm_ptr->node_roots_comm, isDisconnect);
-            if (comm_ptr->intranode_table != NULL)
-                MPIU_Free(comm_ptr->intranode_table);
-            if (comm_ptr->internode_table != NULL)
-                MPIU_Free(comm_ptr->internode_table);
+    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_RELEASE_ALWAYS);
 
-            /* Free the context value.  This should come after freeing the
-             * intra/inter-node communicators since those free calls won't
-             * release this context ID and releasing this before then could lead
-             * to races once we make threading finer grained. */
-            /* This must be the recvcontext_id (i.e. not the (send)context_id)
-             * because in the case of intercommunicators the send context ID is
-             * allocated out of the remote group's bit vector, not ours. */
-            MPIR_Free_contextid( comm_ptr->recvcontext_id );
-
-	    /* We need to release the error handler */
-	    if (comm_ptr->errhandler && 
-		! (HANDLE_GET_KIND(comm_ptr->errhandler->handle) == 
-		   HANDLE_KIND_BUILTIN) ) {
-		int errhInuse;
-		MPIR_Errhandler_release_ref( comm_ptr->errhandler,&errhInuse);
-		if (!errhInuse) {
-		    MPIU_Handle_obj_free( &MPID_Errhandler_mem, 
-					  comm_ptr->errhandler );
-		}
-	    }
-
-	    /* Check for predefined communicators - these should not
-	       be freed */
-	    if (! (HANDLE_GET_KIND(comm_ptr->handle) == HANDLE_KIND_BUILTIN) )
-		MPIU_Handle_obj_free( &MPID_Comm_mem, comm_ptr );  
-	    
-	    /* Remove from the list of active communicators if 
-	       we are supporting message-queue debugging.  We make this
-	       conditional on having debugger support since the
-	       operation is not constant-time */
-	    MPIR_COMML_FORGET( comm_ptr );
-	}
-	else {
-	    /* If the user attribute free function returns an error,
-	       then do not free the communicator */
-	    MPIR_Comm_add_ref( comm_ptr );
-	}
+    /* we want to short-circuit any optimization that avoids reference counting
+     * predefined communicators, such as MPI_COMM_WORLD or MPI_COMM_SELF. */
+    MPIU_Object_release_ref_always(comm_ptr, &in_use);
+    if (!in_use) {
+        mpi_errno = comm_delete(comm_ptr, isDisconnect);
+        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
  fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_RELEASE);
+    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_RELEASE_ALWAYS);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
 }
+

Modified: mpich2/trunk/src/mpid/ch3/src/mpid_finalize.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/src/mpid_finalize.c	2009-10-20 20:35:30 UTC (rev 5499)
+++ mpich2/trunk/src/mpid/ch3/src/mpid_finalize.c	2009-10-20 20:35:32 UTC (rev 5500)
@@ -91,14 +91,14 @@
     MPIR_Nest_decr();
     if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 
-    mpi_errno = MPIR_Comm_release(MPIR_Process.icomm_world, 0);
+    mpi_errno = MPIR_Comm_release_always(MPIR_Process.icomm_world, 0);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 #endif
 
-    mpi_errno = MPIR_Comm_release(MPIR_Process.comm_self, 0);
+    mpi_errno = MPIR_Comm_release_always(MPIR_Process.comm_self, 0);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
-    mpi_errno = MPIR_Comm_release(MPIR_Process.comm_world, 0);
+    mpi_errno = MPIR_Comm_release_always(MPIR_Process.comm_world, 0);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     /* Re-enabling the close step because many tests are failing

Modified: mpich2/trunk/src/mpid/ch3/src/mpid_vc.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/src/mpid_vc.c	2009-10-20 20:35:30 UTC (rev 5499)
+++ mpich2/trunk/src/mpid/ch3/src/mpid_vc.c	2009-10-20 20:35:32 UTC (rev 5500)
@@ -78,6 +78,7 @@
 
     MPIU_CHKPMEM_MALLOC(vcrt, MPIDI_VCRT_t *, sizeof(MPIDI_VCRT_t) + (size - 1) * sizeof(MPIDI_VC_t *),	mpi_errno, "**nomem");
     MPIU_Object_set_ref(vcrt, 1);
+    vcrt->handle = HANDLE_SET_KIND(0, HANDLE_KIND_INVALID);
     vcrt->size = size;
     *vcrt_ptr = vcrt;
 



More information about the mpich2-commits mailing list