[mpich2-commits] r5518 - in mpich2/trunk: src/mpi/coll test/mpi/errors/coll

goodell at mcs.anl.gov goodell at mcs.anl.gov
Wed Oct 21 14:25:35 CDT 2009


Author: goodell
Date: 2009-10-21 14:25:35 -0500 (Wed, 21 Oct 2009)
New Revision: 5518

Modified:
   mpich2/trunk/src/mpi/coll/reduce.c
   mpich2/trunk/test/mpi/errors/coll/noalias.c
Log:
Only check buffer aliasing at the root.

Thanks to Kenneth Inghram for reporting this bug.

Reviewed by buntinas at .

Modified: mpich2/trunk/src/mpi/coll/reduce.c
===================================================================
--- mpich2/trunk/src/mpi/coll/reduce.c	2009-10-21 16:36:52 UTC (rev 5517)
+++ mpich2/trunk/src/mpi/coll/reduce.c	2009-10-21 19:25:35 UTC (rev 5518)
@@ -1009,6 +1009,9 @@
                 if (rank == root) {
                     MPIR_ERRTEST_RECVBUF_INPLACE(recvbuf, count, mpi_errno);
                     MPIR_ERRTEST_USERBUFFER(recvbuf,count,datatype,mpi_errno);
+                    if (count != 0 && sendbuf != MPI_IN_PLACE) {
+                        MPIR_ERRTEST_ALIAS_COLL(sendbuf, recvbuf, mpi_errno);
+                    }
                 }
                 else
                     MPIR_ERRTEST_SENDBUF_INPLACE(sendbuf, count, mpi_errno);
@@ -1053,9 +1056,6 @@
                 mpi_errno = 
                     ( * MPIR_Op_check_dtype_table[op%16 - 1] )(datatype); 
             }
-	    if (count != 0) {
-		MPIR_ERRTEST_ALIAS_COLL(sendbuf, recvbuf, mpi_errno);
-	    }
 	    if (mpi_errno != MPI_SUCCESS) goto fn_fail;
         }
         MPID_END_ERROR_CHECKS;

Modified: mpich2/trunk/test/mpi/errors/coll/noalias.c
===================================================================
--- mpich2/trunk/test/mpi/errors/coll/noalias.c	2009-10-21 16:36:52 UTC (rev 5517)
+++ mpich2/trunk/test/mpi/errors/coll/noalias.c	2009-10-21 19:25:35 UTC (rev 5518)
@@ -10,6 +10,7 @@
 {
     int        err, errs = 0, len;
     int        buf[1], rank;
+    int        recvbuf[1];
     char       msg[MPI_MAX_ERROR_STRING];
 
     MTest_Init( &argc, &argv );
@@ -28,18 +29,49 @@
 	/* (This works if it does not SEGV or hang) */
 	MPI_Error_string( err, msg, &len );
     }
-    err = MPI_Reduce( buf, buf, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD );
-    if (!err) {
-	errs++;
-	if (rank == 0)
-	    printf( "Did not detect aliased arguments in MPI_Reduce\n" );
+
+    /* This case is a bit stranger than the MPI_Allreduce case above, because
+     * the recvbuf argument is only relevant at the root.  So without an extra
+     * communication step to return errors everywhere, it will be typical for
+     * rank 0 (the root) to return an error and all other ranks will return
+     * MPI_SUCCESS.  In many implementations this can leave the non-root
+     * processes hung or yield unmatched unexpected messages on the root.  So we
+     * do our best to carry on in this case by posting a second non-erroneous
+     * MPI_Reduce on any process that got back an error from the intentionally
+     * erroneous MPI_Reduce. */
+    err = MPI_Reduce( buf, ((rank == 0) ? buf : NULL), 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD );
+    if (rank == 0) {
+        if (!err) {
+            errs++;
+            if (rank == 0)
+                printf( "Did not detect aliased arguments in MPI_Reduce\n" );
+        }
+        else {
+            /* Check that we can get a message for this error */
+            /* (This works if it does not SEGV or hang) */
+            MPI_Error_string( err, msg, &len );
+        }
     }
-    else {
-	/* Check that we can get a message for this error */
-	/* (This works if it does not SEGV or hang) */
-	MPI_Error_string( err, msg, &len );
+    if (err) {
+        /* post a correct MPI_Reduce on any processes that got an error earlier */
+        err = MPI_Reduce( buf, recvbuf, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD );
+        if (err) {
+            errs++;
+            printf("make-up reduce failed on rank %d\n", rank);
+        }
     }
 
+    /* this case should _not_ trigger an error, thanks to Kenneth Inghram for
+     * reporting this bug in MPICH2 */
+    err = MPI_Reduce( ((rank == 0) ? MPI_IN_PLACE : buf), buf, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD );
+    if (err) {
+        errs++;
+        printf("Incorrectly reported aliased arguments in MPI_Reduce with MPI_IN_PLACE on rank %d\n", rank);
+        MPI_Abort(MPI_COMM_WORLD, 1);
+        printf("FAILED TO MPI_ABORT!!!\n");
+    }
+
+
     MTest_Finalize( errs );
     MPI_Finalize( );
     return 0;



More information about the mpich2-commits mailing list