[mpich2-commits] r7857 - in mpich2/trunk/src: include mpi/coll mpi/comm mpi/errhan mpid/common/sched
goodell at mcs.anl.gov
goodell at mcs.anl.gov
Thu Jan 27 12:57:30 CST 2011
Author: goodell
Date: 2011-01-27 12:57:30 -0600 (Thu, 27 Jan 2011)
New Revision: 7857
Modified:
mpich2/trunk/src/include/mpiimpl.h
mpich2/trunk/src/include/mpir_nbc.h
mpich2/trunk/src/mpi/coll/iallgather.c
mpich2/trunk/src/mpi/coll/iallgatherv.c
mpich2/trunk/src/mpi/coll/iallreduce.c
mpich2/trunk/src/mpi/coll/ialltoall.c
mpich2/trunk/src/mpi/coll/ialltoallv.c
mpich2/trunk/src/mpi/coll/ialltoallw.c
mpich2/trunk/src/mpi/coll/ibarrier.c
mpich2/trunk/src/mpi/coll/ibcast.c
mpich2/trunk/src/mpi/coll/iexscan.c
mpich2/trunk/src/mpi/coll/igather.c
mpich2/trunk/src/mpi/coll/igatherv.c
mpich2/trunk/src/mpi/coll/ired_scat.c
mpich2/trunk/src/mpi/coll/ired_scat_block.c
mpich2/trunk/src/mpi/coll/ireduce.c
mpich2/trunk/src/mpi/coll/iscan.c
mpich2/trunk/src/mpi/coll/iscatter.c
mpich2/trunk/src/mpi/coll/iscatterv.c
mpich2/trunk/src/mpi/comm/commutil.c
mpich2/trunk/src/mpi/errhan/errnames.txt
mpich2/trunk/src/mpid/common/sched/mpid_sched.c
Log:
avoid NBC tag conflicts with blocking colls, add a weak tag wrapping check
This also allocates tags values per-comm, since NBC post order is only
required to be consistent on a per-communicator basis.
Reviewed by buntinas at .
Modified: mpich2/trunk/src/include/mpiimpl.h
===================================================================
--- mpich2/trunk/src/include/mpiimpl.h 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/include/mpiimpl.h 2011-01-27 18:57:30 UTC (rev 7857)
@@ -1166,6 +1166,7 @@
struct MPID_TopoOps *topo_fns; /* Pointer to a table of functions
implementting the topology routines
*/
+ int next_sched_tag; /* used by the NBC schedule code to allocate tags */
#ifdef MPID_HAS_HETERO
int is_hetero;
#endif
@@ -3262,6 +3263,7 @@
#define MPIR_TOPO_B_TAG 27
#define MPIR_REDUCE_SCATTER_BLOCK_TAG 28
#define MPIR_ERROR_TAG 29
+#define MPIR_FIRST_NBC_TAG 30
/* These functions are used in the implementation of collective and
other internal operations. They are wrappers around MPID send/recv
Modified: mpich2/trunk/src/include/mpir_nbc.h
===================================================================
--- mpich2/trunk/src/include/mpir_nbc.h 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/include/mpir_nbc.h 2011-01-27 18:57:30 UTC (rev 7857)
@@ -41,7 +41,10 @@
* mechanism as well.
*/
-int MPID_Sched_next_tag(int *next_tag);
+/* Open question: should tag allocation be rolled into Sched_start? Keeping it
+ * separate potentially allows more parallelism in the future, but it also
+ * pushes more work onto the clients of this interface. */
+int MPID_Sched_next_tag(MPID_Comm *comm_ptr, int *tag);
/* the device must provide a typedef for MPID_Sched_t in mpidpre.h */
Modified: mpich2/trunk/src/mpi/coll/iallgather.c
===================================================================
--- mpich2/trunk/src/mpi/coll/iallgather.c 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/mpi/coll/iallgather.c 2011-01-27 18:57:30 UTC (rev 7857)
@@ -37,7 +37,7 @@
*request = MPI_REQUEST_NULL;
- mpi_errno = MPID_Sched_next_tag(&tag);
+ mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
mpi_errno = MPID_Sched_create(&s);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
Modified: mpich2/trunk/src/mpi/coll/iallgatherv.c
===================================================================
--- mpich2/trunk/src/mpi/coll/iallgatherv.c 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/mpi/coll/iallgatherv.c 2011-01-27 18:57:30 UTC (rev 7857)
@@ -37,7 +37,7 @@
*request = MPI_REQUEST_NULL;
- mpi_errno = MPID_Sched_next_tag(&tag);
+ mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
mpi_errno = MPID_Sched_create(&s);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
Modified: mpich2/trunk/src/mpi/coll/iallreduce.c
===================================================================
--- mpich2/trunk/src/mpi/coll/iallreduce.c 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/mpi/coll/iallreduce.c 2011-01-27 18:57:30 UTC (rev 7857)
@@ -164,7 +164,7 @@
*request = MPI_REQUEST_NULL;
- mpi_errno = MPID_Sched_next_tag(&tag);
+ mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
mpi_errno = MPID_Sched_create(&s);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
Modified: mpich2/trunk/src/mpi/coll/ialltoall.c
===================================================================
--- mpich2/trunk/src/mpi/coll/ialltoall.c 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/mpi/coll/ialltoall.c 2011-01-27 18:57:30 UTC (rev 7857)
@@ -37,7 +37,7 @@
*request = MPI_REQUEST_NULL;
- mpi_errno = MPID_Sched_next_tag(&tag);
+ mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
mpi_errno = MPID_Sched_create(&s);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
Modified: mpich2/trunk/src/mpi/coll/ialltoallv.c
===================================================================
--- mpich2/trunk/src/mpi/coll/ialltoallv.c 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/mpi/coll/ialltoallv.c 2011-01-27 18:57:30 UTC (rev 7857)
@@ -249,7 +249,7 @@
*request = MPI_REQUEST_NULL;
- mpi_errno = MPID_Sched_next_tag(&tag);
+ mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
mpi_errno = MPID_Sched_create(&s);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
Modified: mpich2/trunk/src/mpi/coll/ialltoallw.c
===================================================================
--- mpich2/trunk/src/mpi/coll/ialltoallw.c 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/mpi/coll/ialltoallw.c 2011-01-27 18:57:30 UTC (rev 7857)
@@ -37,7 +37,7 @@
*request = MPI_REQUEST_NULL;
- mpi_errno = MPID_Sched_next_tag(&tag);
+ mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
mpi_errno = MPID_Sched_create(&s);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
Modified: mpich2/trunk/src/mpi/coll/ibarrier.c
===================================================================
--- mpich2/trunk/src/mpi/coll/ibarrier.c 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/mpi/coll/ibarrier.c 2011-01-27 18:57:30 UTC (rev 7857)
@@ -172,7 +172,7 @@
*request = MPI_REQUEST_NULL;
- mpi_errno = MPID_Sched_next_tag(&tag);
+ mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
mpi_errno = MPID_Sched_create(&s);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
Modified: mpich2/trunk/src/mpi/coll/ibcast.c
===================================================================
--- mpich2/trunk/src/mpi/coll/ibcast.c 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/mpi/coll/ibcast.c 2011-01-27 18:57:30 UTC (rev 7857)
@@ -432,7 +432,7 @@
*request = MPI_REQUEST_NULL;
- mpi_errno = MPID_Sched_next_tag(&tag);
+ mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
mpi_errno = MPID_Sched_create(&s);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
Modified: mpich2/trunk/src/mpi/coll/iexscan.c
===================================================================
--- mpich2/trunk/src/mpi/coll/iexscan.c 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/mpi/coll/iexscan.c 2011-01-27 18:57:30 UTC (rev 7857)
@@ -37,7 +37,7 @@
*request = MPI_REQUEST_NULL;
- mpi_errno = MPID_Sched_next_tag(&tag);
+ mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
mpi_errno = MPID_Sched_create(&s);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
Modified: mpich2/trunk/src/mpi/coll/igather.c
===================================================================
--- mpich2/trunk/src/mpi/coll/igather.c 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/mpi/coll/igather.c 2011-01-27 18:57:30 UTC (rev 7857)
@@ -37,7 +37,7 @@
*request = MPI_REQUEST_NULL;
- mpi_errno = MPID_Sched_next_tag(&tag);
+ mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
mpi_errno = MPID_Sched_create(&s);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
Modified: mpich2/trunk/src/mpi/coll/igatherv.c
===================================================================
--- mpich2/trunk/src/mpi/coll/igatherv.c 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/mpi/coll/igatherv.c 2011-01-27 18:57:30 UTC (rev 7857)
@@ -37,7 +37,7 @@
*request = MPI_REQUEST_NULL;
- mpi_errno = MPID_Sched_next_tag(&tag);
+ mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
mpi_errno = MPID_Sched_create(&s);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
Modified: mpich2/trunk/src/mpi/coll/ired_scat.c
===================================================================
--- mpich2/trunk/src/mpi/coll/ired_scat.c 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/mpi/coll/ired_scat.c 2011-01-27 18:57:30 UTC (rev 7857)
@@ -37,7 +37,7 @@
*request = MPI_REQUEST_NULL;
- mpi_errno = MPID_Sched_next_tag(&tag);
+ mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
mpi_errno = MPID_Sched_create(&s);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
Modified: mpich2/trunk/src/mpi/coll/ired_scat_block.c
===================================================================
--- mpich2/trunk/src/mpi/coll/ired_scat_block.c 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/mpi/coll/ired_scat_block.c 2011-01-27 18:57:30 UTC (rev 7857)
@@ -37,7 +37,7 @@
*request = MPI_REQUEST_NULL;
- mpi_errno = MPID_Sched_next_tag(&tag);
+ mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
mpi_errno = MPID_Sched_create(&s);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
Modified: mpich2/trunk/src/mpi/coll/ireduce.c
===================================================================
--- mpich2/trunk/src/mpi/coll/ireduce.c 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/mpi/coll/ireduce.c 2011-01-27 18:57:30 UTC (rev 7857)
@@ -321,7 +321,7 @@
*request = MPI_REQUEST_NULL;
- mpi_errno = MPID_Sched_next_tag(&tag);
+ mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
mpi_errno = MPID_Sched_create(&s);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
Modified: mpich2/trunk/src/mpi/coll/iscan.c
===================================================================
--- mpich2/trunk/src/mpi/coll/iscan.c 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/mpi/coll/iscan.c 2011-01-27 18:57:30 UTC (rev 7857)
@@ -37,7 +37,7 @@
*request = MPI_REQUEST_NULL;
- mpi_errno = MPID_Sched_next_tag(&tag);
+ mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
mpi_errno = MPID_Sched_create(&s);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
Modified: mpich2/trunk/src/mpi/coll/iscatter.c
===================================================================
--- mpich2/trunk/src/mpi/coll/iscatter.c 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/mpi/coll/iscatter.c 2011-01-27 18:57:30 UTC (rev 7857)
@@ -37,7 +37,7 @@
*request = MPI_REQUEST_NULL;
- mpi_errno = MPID_Sched_next_tag(&tag);
+ mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
mpi_errno = MPID_Sched_create(&s);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
Modified: mpich2/trunk/src/mpi/coll/iscatterv.c
===================================================================
--- mpich2/trunk/src/mpi/coll/iscatterv.c 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/mpi/coll/iscatterv.c 2011-01-27 18:57:30 UTC (rev 7857)
@@ -37,7 +37,7 @@
*request = MPI_REQUEST_NULL;
- mpi_errno = MPID_Sched_next_tag(&tag);
+ mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
mpi_errno = MPID_Sched_create(&s);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
Modified: mpich2/trunk/src/mpi/comm/commutil.c
===================================================================
--- mpich2/trunk/src/mpi/comm/commutil.c 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/mpi/comm/commutil.c 2011-01-27 18:57:30 UTC (rev 7857)
@@ -96,6 +96,9 @@
comm_p->intranode_table = NULL;
comm_p->internode_table = NULL;
+ /* abstractions bleed a bit here... :( */
+ comm_p->next_sched_tag = MPIR_FIRST_NBC_TAG;
+
/* Fields not set include context_id, remote and local size, and
kind, since different communicator construction routines need
different values */
Modified: mpich2/trunk/src/mpi/errhan/errnames.txt
===================================================================
--- mpich2/trunk/src/mpi/errhan/errnames.txt 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/mpi/errhan/errnames.txt 2011-01-27 18:57:30 UTC (rev 7857)
@@ -887,6 +887,8 @@
**signal %s:signal() failed: %s
**sigusr1:This version of MPICH requires the SIGUSR1 signal, but the application has already installed a handler
+**toomanynbc: too many outstanding nonblocking collectives detected
+
#
# mpi functions
#
Modified: mpich2/trunk/src/mpid/common/sched/mpid_sched.c
===================================================================
--- mpich2/trunk/src/mpid/common/sched/mpid_sched.c 2011-01-27 17:01:30 UTC (rev 7856)
+++ mpich2/trunk/src/mpid/common/sched/mpid_sched.c 2011-01-27 18:57:30 UTC (rev 7857)
@@ -30,9 +30,6 @@
/* holds on to all incomplete schedules on which progress should be made */
struct MPIDU_Sched_state all_schedules = {NULL};
-/* FIXME MT needs locking or atomic access for fine-grained threading */
-static int next_sched_tag = 0;
-
/* returns TRUE if any schedules are currently pending completion by the
* progress engine, FALSE otherwise */
#undef FUNCNAME
@@ -48,14 +45,50 @@
#define FUNCNAME MPID_Sched_next_tag
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPID_Sched_next_tag(int *tag)
+int MPID_Sched_next_tag(MPID_Comm *comm_ptr, int *tag)
{
- *tag = next_sched_tag;
-
+ int mpi_errno = MPI_SUCCESS;
/* TODO there should be an internal accessor/utility macro for getting the
* TAG_UB value that doesn't require using the attribute interface */
- next_sched_tag = (next_sched_tag + 1) % MPIR_Process.attrs.tag_ub;
- return MPI_SUCCESS;
+ int tag_ub = MPIR_Process.attrs.tag_ub;
+#if defined(HAVE_ERROR_CHECKING)
+ int start = MPI_UNDEFINED;
+ int end = MPI_UNDEFINED;
+ struct MPIDU_Sched *elt = NULL;
+#endif
+
+ *tag = comm_ptr->next_sched_tag;
+ ++comm_ptr->next_sched_tag;
+
+#if defined(HAVE_ERROR_CHECKING)
+ /* Upon entry into the second half of the tag space, ensure there are no
+ * outstanding schedules still using the second half of the space. Check
+ * the first half similarly on wraparound. */
+ if (comm_ptr->next_sched_tag == (tag_ub / 2)) {
+ start = tag_ub / 2;
+ end = tag_ub;
+ }
+ else if (comm_ptr->next_sched_tag == (tag_ub)) {
+ start = MPIR_FIRST_NBC_TAG;
+ end = tag_ub / 2;
+ }
+ if (start != MPI_UNDEFINED) {
+ MPL_DL_FOREACH(all_schedules.head, elt) {
+ if (elt->tag >= start && elt->tag < end) {
+ MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**toomanynbc");
+ }
+ }
+ }
+#endif
+
+ /* wrap the tag values around to the start, but don't allow it to conflict
+ * with the tags used by the blocking collectives */
+ if (comm_ptr->next_sched_tag == tag_ub) {
+ comm_ptr->next_sched_tag = MPIR_FIRST_NBC_TAG;
+ }
+
+fn_fail:
+ return mpi_errno;
}
#undef FUNCNAME
More information about the mpich2-commits
mailing list