[mpich2-commits] r5518 - in mpich2/trunk: src/mpi/coll test/mpi/errors/coll
goodell at mcs.anl.gov
goodell at mcs.anl.gov
Wed Oct 21 14:25:35 CDT 2009
Author: goodell
Date: 2009-10-21 14:25:35 -0500 (Wed, 21 Oct 2009)
New Revision: 5518
Modified:
mpich2/trunk/src/mpi/coll/reduce.c
mpich2/trunk/test/mpi/errors/coll/noalias.c
Log:
Only check buffer aliasing at the root.
Thanks to Kenneth Inghram for reporting this bug.
Reviewed by buntinas at .
Modified: mpich2/trunk/src/mpi/coll/reduce.c
===================================================================
--- mpich2/trunk/src/mpi/coll/reduce.c 2009-10-21 16:36:52 UTC (rev 5517)
+++ mpich2/trunk/src/mpi/coll/reduce.c 2009-10-21 19:25:35 UTC (rev 5518)
@@ -1009,6 +1009,9 @@
if (rank == root) {
MPIR_ERRTEST_RECVBUF_INPLACE(recvbuf, count, mpi_errno);
MPIR_ERRTEST_USERBUFFER(recvbuf,count,datatype,mpi_errno);
+ if (count != 0 && sendbuf != MPI_IN_PLACE) {
+ MPIR_ERRTEST_ALIAS_COLL(sendbuf, recvbuf, mpi_errno);
+ }
}
else
MPIR_ERRTEST_SENDBUF_INPLACE(sendbuf, count, mpi_errno);
@@ -1053,9 +1056,6 @@
mpi_errno =
( * MPIR_Op_check_dtype_table[op%16 - 1] )(datatype);
}
- if (count != 0) {
- MPIR_ERRTEST_ALIAS_COLL(sendbuf, recvbuf, mpi_errno);
- }
if (mpi_errno != MPI_SUCCESS) goto fn_fail;
}
MPID_END_ERROR_CHECKS;
Modified: mpich2/trunk/test/mpi/errors/coll/noalias.c
===================================================================
--- mpich2/trunk/test/mpi/errors/coll/noalias.c 2009-10-21 16:36:52 UTC (rev 5517)
+++ mpich2/trunk/test/mpi/errors/coll/noalias.c 2009-10-21 19:25:35 UTC (rev 5518)
@@ -10,6 +10,7 @@
{
int err, errs = 0, len;
int buf[1], rank;
+ int recvbuf[1];
char msg[MPI_MAX_ERROR_STRING];
MTest_Init( &argc, &argv );
@@ -28,18 +29,49 @@
/* (This works if it does not SEGV or hang) */
MPI_Error_string( err, msg, &len );
}
- err = MPI_Reduce( buf, buf, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD );
- if (!err) {
- errs++;
- if (rank == 0)
- printf( "Did not detect aliased arguments in MPI_Reduce\n" );
+
+ /* This case is a bit stranger than the MPI_Allreduce case above, because
+ * the recvbuf argument is only relevant at the root. So without an extra
+ * communication step to return errors everywhere, it will be typical for
+ * rank 0 (the root) to return an error and all other ranks will return
+ * MPI_SUCCESS. In many implementations this can leave the non-root
+ * processes hung or yield unmatched unexpected messages on the root. So we
+ * do our best to carry on in this case by posting a second non-erroneous
+ * MPI_Reduce on any process that got back an error from the intentionally
+ * erroneous MPI_Reduce. */
+ err = MPI_Reduce( buf, ((rank == 0) ? buf : NULL), 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD );
+ if (rank == 0) {
+ if (!err) {
+ errs++;
+ if (rank == 0)
+ printf( "Did not detect aliased arguments in MPI_Reduce\n" );
+ }
+ else {
+ /* Check that we can get a message for this error */
+ /* (This works if it does not SEGV or hang) */
+ MPI_Error_string( err, msg, &len );
+ }
}
- else {
- /* Check that we can get a message for this error */
- /* (This works if it does not SEGV or hang) */
- MPI_Error_string( err, msg, &len );
+ if (err) {
+ /* post a correct MPI_Reduce on any processes that got an error earlier */
+ err = MPI_Reduce( buf, recvbuf, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD );
+ if (err) {
+ errs++;
+ printf("make-up reduce failed on rank %d\n", rank);
+ }
}
+ /* this case should _not_ trigger an error, thanks to Kenneth Inghram for
+ * reporting this bug in MPICH2 */
+ err = MPI_Reduce( ((rank == 0) ? MPI_IN_PLACE : buf), buf, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD );
+ if (err) {
+ errs++;
+ printf("Incorrectly reported aliased arguments in MPI_Reduce with MPI_IN_PLACE on rank %d\n", rank);
+ MPI_Abort(MPI_COMM_WORLD, 1);
+ printf("FAILED TO MPI_ABORT!!!\n");
+ }
+
+
MTest_Finalize( errs );
MPI_Finalize( );
return 0;
More information about the mpich2-commits
mailing list