[mpich2-commits] r7807 - in mpich2/trunk/src: include mpi/coll mpid/ch3/channels/nemesis/src mpid/ch3/src

buntinas at mcs.anl.gov buntinas at mcs.anl.gov
Fri Jan 21 13:53:52 CST 2011


Author: buntinas
Date: 2011-01-21 13:53:51 -0600 (Fri, 21 Jan 2011)
New Revision: 7807

Modified:
   mpich2/trunk/src/include/mpiimpl.h
   mpich2/trunk/src/mpi/coll/barrier.c
   mpich2/trunk/src/mpid/ch3/channels/nemesis/src/ch3i_comm.c
   mpich2/trunk/src/mpid/ch3/src/ch3u_port.c
Log:
added error return code for barrier

Modified: mpich2/trunk/src/include/mpiimpl.h
===================================================================
--- mpich2/trunk/src/include/mpiimpl.h	2011-01-21 18:41:45 UTC (rev 7806)
+++ mpich2/trunk/src/include/mpiimpl.h	2011-01-21 19:53:51 UTC (rev 7807)
@@ -1738,7 +1738,7 @@
 typedef struct MPID_Collops {
     int ref_count;   /* Supports lazy copies */
     /* Contains pointers to the functions for the MPI collectives */
-    int (*Barrier) (MPID_Comm *);
+    int (*Barrier) (MPID_Comm *, int *);
     int (*Bcast) (void*, int, MPI_Datatype, int, MPID_Comm *, int *);
     int (*Gather) (void*, int, MPI_Datatype, void*, int, MPI_Datatype, 
                    int, MPID_Comm *, int *); 
@@ -3504,10 +3504,10 @@
                    MPI_Datatype sendtype, void *recvbuf, int recvcnt,
                    MPI_Datatype recvtype, int root, MPID_Comm
                    *comm_ptr, int *errflag);
-int MPIR_Barrier_impl( MPID_Comm *comm_ptr);
-int MPIR_Barrier( MPID_Comm *comm_ptr);
-int MPIR_Barrier_intra( MPID_Comm *comm_ptr);
-int MPIR_Barrier_inter( MPID_Comm *comm_ptr);
+int MPIR_Barrier_impl( MPID_Comm *comm_ptr, int *errflag);
+int MPIR_Barrier( MPID_Comm *comm_ptr, int *errflag);
+int MPIR_Barrier_intra( MPID_Comm *comm_ptr, int *errflag);
+int MPIR_Barrier_inter( MPID_Comm *comm_ptr, int *errflag);
 
 int MPIR_Reduce_local_impl(void *inbuf, void *inoutbuf, int count, MPI_Datatype datatype, MPI_Op op);
 

Modified: mpich2/trunk/src/mpi/coll/barrier.c
===================================================================
--- mpich2/trunk/src/mpi/coll/barrier.c	2011-01-21 18:41:45 UTC (rev 7806)
+++ mpich2/trunk/src/mpi/coll/barrier.c	2011-01-21 19:53:51 UTC (rev 7807)
@@ -17,7 +17,7 @@
 #endif
 /* -- End Profiling Symbol Block */
 
-PMPI_LOCAL int MPIR_Barrier_or_coll_fn(MPID_Comm *comm_ptr );
+PMPI_LOCAL int MPIR_Barrier_or_coll_fn(MPID_Comm *comm_ptr, int *errflag );
 
 /* Define MPICH_MPI_FROM_PMPI if weak symbols are not supported to build
    the MPI routines */
@@ -52,7 +52,7 @@
 #define FUNCNAME MPIR_Barrier_intra
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPIR_Barrier_intra( MPID_Comm *comm_ptr )
+int MPIR_Barrier_intra( MPID_Comm *comm_ptr, int *errflag )
 {
     int size, rank, src, dst, mask, mpi_errno=MPI_SUCCESS;
     int mpi_errno_ret = MPI_SUCCESS;
@@ -73,12 +73,13 @@
     while (mask < size) {
         dst = (rank + mask) % size;
         src = (rank - mask + size) % size;
-        mpi_errno = MPIC_Sendrecv(NULL, 0, MPI_BYTE, dst,
+        mpi_errno = MPIC_Sendrecv_ft(NULL, 0, MPI_BYTE, dst,
                                      MPIR_BARRIER_TAG, NULL, 0, MPI_BYTE,
                                      src, MPIR_BARRIER_TAG, comm,
-                                     MPI_STATUS_IGNORE);
+                                     MPI_STATUS_IGNORE, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -89,6 +90,8 @@
     MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -102,19 +105,19 @@
 #define FUNCNAME MPIR_Barrier_or_coll_fn
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
-PMPI_LOCAL int MPIR_Barrier_or_coll_fn(MPID_Comm *comm_ptr )
+PMPI_LOCAL int MPIR_Barrier_or_coll_fn(MPID_Comm *comm_ptr, int *errflag )
 {
     int mpi_errno = MPI_SUCCESS;
 
     if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Barrier != NULL)
     {
         /* --BEGIN USEREXTENSION-- */
-        mpi_errno = comm_ptr->node_roots_comm->coll_fns->Barrier(comm_ptr);
+        mpi_errno = comm_ptr->node_roots_comm->coll_fns->Barrier(comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         /* --END USEREXTENSION-- */
     }
     else {
-        mpi_errno = MPIR_Barrier_intra(comm_ptr);
+        mpi_errno = MPIR_Barrier_intra(comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -131,13 +134,12 @@
 #define FUNCNAME MPIR_Barrier_inter
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPIR_Barrier_inter( MPID_Comm *comm_ptr )
+int MPIR_Barrier_inter( MPID_Comm *comm_ptr, int *errflag )
 {
     int rank, mpi_errno = MPI_SUCCESS, root;
     int mpi_errno_ret = MPI_SUCCESS;
     int i = 0;
     MPID_Comm *newcomm_ptr = NULL;
-    int errflag = FALSE;
     
     rank = comm_ptr->rank;
 
@@ -150,9 +152,10 @@
     newcomm_ptr = comm_ptr->local_comm;
 
     /* do a barrier on the local intracommunicator */
-    mpi_errno = MPIR_Barrier_intra(newcomm_ptr);
+    mpi_errno = MPIR_Barrier_intra(newcomm_ptr, errflag);
     if (mpi_errno) {
         /* for communication errors, just record the error but continue */
+        *errflag = TRUE;
         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
     }
@@ -167,46 +170,50 @@
     if (comm_ptr->is_low_group) {
         /* bcast to right*/
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
-        mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, &errflag);
+        mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
-        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
         /* receive bcast from right */
         root = 0;
-        mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, &errflag);
+        mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
-        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     }
     else {
         /* receive bcast from left */
         root = 0;
-        mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, &errflag);
+        mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
-        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
         /* bcast to left */
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
-        mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, &errflag);
+        mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
-        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     }
  fn_exit:
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -220,17 +227,17 @@
 #define FUNCNAME MPIR_Barrier
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPIR_Barrier(MPID_Comm *comm_ptr)
+int MPIR_Barrier(MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
 
     if (comm_ptr->comm_kind == MPID_INTRACOMM) {
         /* intracommunicator */
-        mpi_errno = MPIR_Barrier_intra( comm_ptr );
+        mpi_errno = MPIR_Barrier_intra( comm_ptr, errflag );
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         /* intercommunicator */
-        mpi_errno = MPIR_Barrier_inter( comm_ptr );
+        mpi_errno = MPIR_Barrier_inter( comm_ptr, errflag );
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -249,14 +256,13 @@
 #define FUNCNAME MPIR_Barrier_impl
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPIR_Barrier_impl(MPID_Comm *comm_ptr)
+int MPIR_Barrier_impl(MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
     int mpi_errno_ret = MPI_SUCCESS;
-    int errflag = FALSE;
     if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Barrier != NULL)
     {
-	mpi_errno = comm_ptr->coll_fns->Barrier(comm_ptr);
+	mpi_errno = comm_ptr->coll_fns->Barrier(comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
     else
@@ -268,9 +274,10 @@
                 /* do the intranode barrier on all nodes */
                 if (comm_ptr->node_comm != NULL)
                 {
-                    mpi_errno = MPIR_Barrier_or_coll_fn(comm_ptr->node_comm);
+                    mpi_errno = MPIR_Barrier_or_coll_fn(comm_ptr->node_comm, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
@@ -278,9 +285,10 @@
 
                 /* do the barrier across roots of all nodes */
                 if (comm_ptr->node_roots_comm != NULL) {
-                    mpi_errno = MPIR_Barrier_or_coll_fn(comm_ptr->node_roots_comm);
+                    mpi_errno = MPIR_Barrier_or_coll_fn(comm_ptr->node_roots_comm, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
@@ -291,27 +299,27 @@
                 if (comm_ptr->node_comm != NULL)
                 {
 		    int i=0;
-                    mpi_errno = MPIR_Bcast_impl(&i, 1, MPI_BYTE, 0, comm_ptr->node_comm, &errflag);
+                    mpi_errno = MPIR_Bcast_impl(&i, 1, MPI_BYTE, 0, comm_ptr->node_comm, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
-                    MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
                 }
             }
             else {
-                mpi_errno = MPIR_Barrier_intra( comm_ptr );
+                mpi_errno = MPIR_Barrier_intra( comm_ptr, errflag );
                 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
             }
 #else
-            mpi_errno = MPIR_Barrier_intra( comm_ptr );
+            mpi_errno = MPIR_Barrier_intra( comm_ptr, errflag );
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 #endif
         }
         else {
             /* intercommunicator */ 
-            mpi_errno = MPIR_Barrier_inter( comm_ptr );
+            mpi_errno = MPIR_Barrier_inter( comm_ptr, errflag );
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 	}
     }
@@ -319,6 +327,8 @@
  fn_exit:
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -359,6 +369,7 @@
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Comm *comm_ptr = NULL;
+    int errflag = FALSE;
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_BARRIER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -396,7 +407,7 @@
 
     /* ... body of routine ...  */
 
-    mpi_errno = MPIR_Barrier_impl(comm_ptr);
+    mpi_errno = MPIR_Barrier_impl(comm_ptr, &errflag);
     if (mpi_errno) goto fn_fail;
     
     /* ... end of body of routine ... */

Modified: mpich2/trunk/src/mpid/ch3/channels/nemesis/src/ch3i_comm.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/src/ch3i_comm.c	2011-01-21 18:41:45 UTC (rev 7806)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/src/ch3i_comm.c	2011-01-21 19:53:51 UTC (rev 7807)
@@ -8,7 +8,7 @@
 
 #define NULL_CONTEXT_ID -1
 
-static int barrier (MPID_Comm *comm_ptr);
+static int barrier (MPID_Comm *comm_ptr, int *errflag);
 static int alloc_barrier_vars (MPID_Comm *comm, MPID_nem_barrier_vars_t **vars);
 
 static MPID_Collops collective_functions = {
@@ -142,7 +142,7 @@
 #define FUNCNAME barrier
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-static int barrier (MPID_Comm *comm_ptr)
+static int barrier (MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_nem_barrier_vars_t *barrier_vars;

Modified: mpich2/trunk/src/mpid/ch3/src/ch3u_port.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/src/ch3u_port.c	2011-01-21 18:41:45 UTC (rev 7806)
+++ mpich2/trunk/src/mpid/ch3/src/ch3u_port.c	2011-01-21 19:53:51 UTC (rev 7807)
@@ -504,7 +504,7 @@
     }
 
     /*printf("connect:barrier\n");fflush(stdout);*/
-    mpi_errno = MPIR_Barrier_intra(comm_ptr);
+    mpi_errno = MPIR_Barrier_intra(comm_ptr, &errflag);
     if (mpi_errno != MPI_SUCCESS) {
 	MPIU_ERR_POP(mpi_errno);
     }
@@ -1091,7 +1091,7 @@
     }
 
     MPIU_DBG_MSG(CH3_CONNECT,VERBOSE,"Barrier");
-    mpi_errno = MPIR_Barrier_intra(comm_ptr);
+    mpi_errno = MPIR_Barrier_intra(comm_ptr, &errflag);
     if (mpi_errno != MPI_SUCCESS) {
 	MPIU_ERR_POP(mpi_errno);
     }
@@ -1146,7 +1146,7 @@
 			      MPID_Comm *intercomm )
 {
     int mpi_errno = MPI_SUCCESS, i;
-
+    int errflag = FALSE;
     /* FIXME: How much of this could/should be common with the
        upper level (src/mpi/comm/ *.c) code? For best robustness, 
        this should use the same routine (not copy/paste code) as
@@ -1181,7 +1181,7 @@
     }
 
     MPIU_DBG_MSG(CH3_CONNECT,VERBOSE,"Barrier");
-    mpi_errno = MPIR_Barrier_intra(comm_ptr);
+    mpi_errno = MPIR_Barrier_intra(comm_ptr, &errflag);
     if (mpi_errno != MPI_SUCCESS) {
 	MPIU_ERR_POP(mpi_errno);
     }



More information about the mpich2-commits mailing list