[mpich2-commits] r7786 - in mpich2/trunk/src: include mpi/coll mpi/comm

goodell at mcs.anl.gov goodell at mcs.anl.gov
Thu Jan 20 16:09:04 CST 2011


Author: goodell
Date: 2011-01-20 16:09:04 -0600 (Thu, 20 Jan 2011)
New Revision: 7786

Modified:
   mpich2/trunk/src/include/mpiimpl.h
   mpich2/trunk/src/mpi/coll/ibcast.c
   mpich2/trunk/src/mpi/comm/commutil.c
Log:
add MPIR_Ibcast_SMP

Reviewed by balaji at .

Modified: mpich2/trunk/src/include/mpiimpl.h
===================================================================
--- mpich2/trunk/src/include/mpiimpl.h	2011-01-20 22:09:01 UTC (rev 7785)
+++ mpich2/trunk/src/include/mpiimpl.h	2011-01-20 22:09:04 UTC (rev 7786)
@@ -3522,6 +3522,7 @@
 
 int MPIR_Ibcast_intra(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
 int MPIR_Ibcast_inter(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ibcast_SMP(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
 
 
 /* random initializers */

Modified: mpich2/trunk/src/mpi/coll/ibcast.c
===================================================================
--- mpich2/trunk/src/mpi/coll/ibcast.c	2011-01-20 22:09:01 UTC (rev 7785)
+++ mpich2/trunk/src/mpi/coll/ibcast.c	2011-01-20 22:09:04 UTC (rev 7786)
@@ -5,6 +5,7 @@
  */
 
 #include "mpiimpl.h"
+#include "collutil.h"
 
 /* -- Begin Profiling Symbol Block for routine MPIX_Ibcast */
 #if defined(HAVE_PRAGMA_WEAK)
@@ -186,10 +187,170 @@
 }
 
 #undef FUNCNAME
-#define FUNCNAME MPIR_Ibcast_intra
+#define FUNCNAME MPIR_Ibcast_SMP
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPIR_Ibcast_intra(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr, MPI_Request *request)
+int MPIR_Ibcast_SMP(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr, MPID_Sched_t s)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int type_size, is_homogeneous;
+    int nbytes=0;
+
+#if !defined(USE_SMP_COLLECTIVES)
+    MPID_Abort(comm_ptr, MPI_ERR_OTHER, 1, "SMP collectives are disabled!");
+#endif
+    MPIU_Assert(MPIR_Comm_is_node_aware(comm_ptr));
+
+    is_homogeneous = 1;
+#ifdef MPID_HAS_HETERO
+    if (comm_ptr->is_hetero)
+        is_homogeneous = 0;
+#endif
+
+    MPIU_Assert(is_homogeneous); /* we don't handle the hetero case yet */
+    if (comm_ptr->node_comm) {
+        MPIU_Assert(comm_ptr->node_comm->coll_fns);
+        MPIU_Assert(comm_ptr->node_comm->coll_fns->Ibcast);
+    }
+    if (comm_ptr->node_roots_comm) {
+        MPIU_Assert(comm_ptr->node_roots_comm->coll_fns);
+        MPIU_Assert(comm_ptr->node_roots_comm->coll_fns->Ibcast);
+    }
+
+    /* MPI_Type_size() might not give the accurate size of the packed
+     * datatype for heterogeneous systems (because of padding, encoding,
+     * etc). On the other hand, MPI_Pack_size() can become very
+     * expensive, depending on the implementation, especially for
+     * heterogeneous systems. We want to use MPI_Type_size() wherever
+     * possible, and MPI_Pack_size() in other places.
+     */
+    if (is_homogeneous)
+        MPID_Datatype_get_size_macro(datatype, type_size);
+    else
+        MPIR_Pack_size_impl(1, datatype, &type_size);
+
+    nbytes = type_size * count;
+
+    /* TODO insert packing here */
+
+    if ((nbytes < MPIR_PARAM_BCAST_SHORT_MSG_SIZE) || (comm_ptr->local_size < MPIR_PARAM_BCAST_MIN_PROCS))
+    {
+        /* send to intranode-rank 0 on the root's node */
+        if (comm_ptr->node_comm != NULL &&
+            MPIU_Get_intranode_rank(comm_ptr, root) > 0) /* is not the node root (0) */ 
+        {                                                /* and is on our node (!-1) */
+            if (root == comm_ptr->rank) {
+                mpi_errno = MPID_Sched_send(buffer, count, datatype, 0, comm_ptr->node_comm, s); 
+            }
+            else if (0 == comm_ptr->node_comm->rank) {
+                mpi_errno = MPID_Sched_recv(buffer, count, datatype, MPIU_Get_intranode_rank(comm_ptr, root),
+                                            comm_ptr->node_comm, s);
+            }
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+            mpi_errno = MPID_Sched_barrier(s);
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+        }
+
+        /* perform the internode broadcast */
+        if (comm_ptr->node_roots_comm != NULL)
+        {
+            mpi_errno = comm_ptr->node_roots_comm->coll_fns->Ibcast(buffer, count, datatype,
+                                                                    MPIU_Get_internode_rank(comm_ptr, root),
+                                                                    comm_ptr->node_roots_comm, s);
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+            /* don't allow the local ops for the intranode phase to start until this has completed */
+            mpi_errno = MPID_Sched_barrier(s);
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+        }
+        /* perform the intranode broadcast on all except for the root's node */
+        if (comm_ptr->node_comm != NULL)
+        {
+            mpi_errno = comm_ptr->node_comm->coll_fns->Ibcast(buffer, count, datatype, 0, comm_ptr->node_comm, s);
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+        }
+    }
+    else /* (nbytes > MPIR_PARAM_BCAST_SHORT_MSG_SIZE) && (comm_ptr->size >= MPIR_PARAM_BCAST_MIN_PROCS) */
+    {
+        /* supposedly...
+           smp+doubling good for pof2
+           reg+ring better for non-pof2 */
+        if (nbytes < MPIR_PARAM_BCAST_LONG_MSG_SIZE && MPIU_is_pof2(comm_ptr->local_size, NULL))
+        {
+            /* medium-sized msg and pof2 np */
+
+            /* perform the intranode broadcast on the root's node */
+            if (comm_ptr->node_comm != NULL &&
+                MPIU_Get_intranode_rank(comm_ptr, root) > 0) /* is not the node root (0) */ 
+            {                                                /* and is on our node (!-1) */
+                /* was binomial */
+                mpi_errno = comm_ptr->node_comm->coll_fns->Ibcast(buffer, count, datatype,
+                                                                  MPIU_Get_intranode_rank(comm_ptr, root),
+                                                                  comm_ptr->node_comm, s);
+                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+                mpi_errno = MPID_Sched_barrier(s);
+                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            }
+
+            /* FIXME do we need barriers in here at all? */
+
+            /* perform the internode broadcast */
+            if (comm_ptr->node_roots_comm != NULL)
+            {
+#if 0
+                if (MPIU_is_pof2(comm_ptr->node_roots_comm->local_size, NULL))
+                {
+                    /* was scatter doubling allgather */
+                }
+                else
+                {
+                    /* was scatter ring allgather */
+                }
+#endif
+                mpi_errno = comm_ptr->node_roots_comm->coll_fns->Ibcast(buffer, count, datatype,
+                                                                        MPIU_Get_internode_rank(comm_ptr, root),
+                                                                        comm_ptr->node_roots_comm, s);
+                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+                /* don't allow the local ops for the intranode phase to start until this has completed */
+                mpi_errno = MPID_Sched_barrier(s);
+                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            }
+
+            /* perform the intranode broadcast on all except for the root's node */
+            if (comm_ptr->node_comm != NULL &&
+                MPIU_Get_intranode_rank(comm_ptr, root) <= 0) /* 0 if root was local root too */
+            {                                                 /* -1 if different node than root */
+                /* was binomial */
+                mpi_errno = comm_ptr->node_comm->coll_fns->Ibcast(buffer, count, datatype, 0, comm_ptr->node_comm, s);
+                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            }
+        }
+        else /* large msg or non-pof2 */
+        {
+            /* TODO port scatter_ring_allgather to NBC */
+            /*
+            mpi_errno = MPIR_Bcast_scatter_ring_allgather(buffer, count, datatype, root, comm_ptr);
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            */
+            mpi_errno = MPIR_Ibcast_binomial(buffer, count, datatype, root, comm_ptr, s);
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+        }
+    }
+
+
+fn_exit:
+    return mpi_errno;
+fn_fail:
+    goto fn_exit;
+}
+
+
+/* Provides a generic "flat" broadcast that doesn't know anything about hierarchy.  It will choose
+ * between several different algorithms based on the given parameters. */
 #undef FUNCNAME
 #define FUNCNAME MPIR_Ibcast_intra
 #undef FCNAME

Modified: mpich2/trunk/src/mpi/comm/commutil.c
===================================================================
--- mpich2/trunk/src/mpi/comm/commutil.c	2011-01-20 22:09:01 UTC (rev 7785)
+++ mpich2/trunk/src/mpi/comm/commutil.c	2011-01-20 22:09:04 UTC (rev 7786)
@@ -249,6 +249,7 @@
             case MPID_HIERARCHY_FLAT:
                 break;
             case MPID_HIERARCHY_PARENT:
+                ops->Ibcast = &MPIR_Ibcast_SMP;
                 break;
             case MPID_HIERARCHY_NODE:
                 break;



More information about the mpich2-commits mailing list