[mpich2-commits] r7807 - in mpich2/trunk/src: include mpi/coll mpid/ch3/channels/nemesis/src mpid/ch3/src
buntinas at mcs.anl.gov
buntinas at mcs.anl.gov
Fri Jan 21 13:53:52 CST 2011
Author: buntinas
Date: 2011-01-21 13:53:51 -0600 (Fri, 21 Jan 2011)
New Revision: 7807
Modified:
mpich2/trunk/src/include/mpiimpl.h
mpich2/trunk/src/mpi/coll/barrier.c
mpich2/trunk/src/mpid/ch3/channels/nemesis/src/ch3i_comm.c
mpich2/trunk/src/mpid/ch3/src/ch3u_port.c
Log:
added error return code for barrier
Modified: mpich2/trunk/src/include/mpiimpl.h
===================================================================
--- mpich2/trunk/src/include/mpiimpl.h 2011-01-21 18:41:45 UTC (rev 7806)
+++ mpich2/trunk/src/include/mpiimpl.h 2011-01-21 19:53:51 UTC (rev 7807)
@@ -1738,7 +1738,7 @@
typedef struct MPID_Collops {
int ref_count; /* Supports lazy copies */
/* Contains pointers to the functions for the MPI collectives */
- int (*Barrier) (MPID_Comm *);
+ int (*Barrier) (MPID_Comm *, int *);
int (*Bcast) (void*, int, MPI_Datatype, int, MPID_Comm *, int *);
int (*Gather) (void*, int, MPI_Datatype, void*, int, MPI_Datatype,
int, MPID_Comm *, int *);
@@ -3504,10 +3504,10 @@
MPI_Datatype sendtype, void *recvbuf, int recvcnt,
MPI_Datatype recvtype, int root, MPID_Comm
*comm_ptr, int *errflag);
-int MPIR_Barrier_impl( MPID_Comm *comm_ptr);
-int MPIR_Barrier( MPID_Comm *comm_ptr);
-int MPIR_Barrier_intra( MPID_Comm *comm_ptr);
-int MPIR_Barrier_inter( MPID_Comm *comm_ptr);
+int MPIR_Barrier_impl( MPID_Comm *comm_ptr, int *errflag);
+int MPIR_Barrier( MPID_Comm *comm_ptr, int *errflag);
+int MPIR_Barrier_intra( MPID_Comm *comm_ptr, int *errflag);
+int MPIR_Barrier_inter( MPID_Comm *comm_ptr, int *errflag);
int MPIR_Reduce_local_impl(void *inbuf, void *inoutbuf, int count, MPI_Datatype datatype, MPI_Op op);
Modified: mpich2/trunk/src/mpi/coll/barrier.c
===================================================================
--- mpich2/trunk/src/mpi/coll/barrier.c 2011-01-21 18:41:45 UTC (rev 7806)
+++ mpich2/trunk/src/mpi/coll/barrier.c 2011-01-21 19:53:51 UTC (rev 7807)
@@ -17,7 +17,7 @@
#endif
/* -- End Profiling Symbol Block */
-PMPI_LOCAL int MPIR_Barrier_or_coll_fn(MPID_Comm *comm_ptr );
+PMPI_LOCAL int MPIR_Barrier_or_coll_fn(MPID_Comm *comm_ptr, int *errflag );
/* Define MPICH_MPI_FROM_PMPI if weak symbols are not supported to build
the MPI routines */
@@ -52,7 +52,7 @@
#define FUNCNAME MPIR_Barrier_intra
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPIR_Barrier_intra( MPID_Comm *comm_ptr )
+int MPIR_Barrier_intra( MPID_Comm *comm_ptr, int *errflag )
{
int size, rank, src, dst, mask, mpi_errno=MPI_SUCCESS;
int mpi_errno_ret = MPI_SUCCESS;
@@ -73,12 +73,13 @@
while (mask < size) {
dst = (rank + mask) % size;
src = (rank - mask + size) % size;
- mpi_errno = MPIC_Sendrecv(NULL, 0, MPI_BYTE, dst,
+ mpi_errno = MPIC_Sendrecv_ft(NULL, 0, MPI_BYTE, dst,
MPIR_BARRIER_TAG, NULL, 0, MPI_BYTE,
src, MPIR_BARRIER_TAG, comm,
- MPI_STATUS_IGNORE);
+ MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -89,6 +90,8 @@
MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -102,19 +105,19 @@
#define FUNCNAME MPIR_Barrier_or_coll_fn
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
-PMPI_LOCAL int MPIR_Barrier_or_coll_fn(MPID_Comm *comm_ptr )
+PMPI_LOCAL int MPIR_Barrier_or_coll_fn(MPID_Comm *comm_ptr, int *errflag )
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Barrier != NULL)
{
/* --BEGIN USEREXTENSION-- */
- mpi_errno = comm_ptr->node_roots_comm->coll_fns->Barrier(comm_ptr);
+ mpi_errno = comm_ptr->node_roots_comm->coll_fns->Barrier(comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
/* --END USEREXTENSION-- */
}
else {
- mpi_errno = MPIR_Barrier_intra(comm_ptr);
+ mpi_errno = MPIR_Barrier_intra(comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -131,13 +134,12 @@
#define FUNCNAME MPIR_Barrier_inter
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPIR_Barrier_inter( MPID_Comm *comm_ptr )
+int MPIR_Barrier_inter( MPID_Comm *comm_ptr, int *errflag )
{
int rank, mpi_errno = MPI_SUCCESS, root;
int mpi_errno_ret = MPI_SUCCESS;
int i = 0;
MPID_Comm *newcomm_ptr = NULL;
- int errflag = FALSE;
rank = comm_ptr->rank;
@@ -150,9 +152,10 @@
newcomm_ptr = comm_ptr->local_comm;
/* do a barrier on the local intracommunicator */
- mpi_errno = MPIR_Barrier_intra(newcomm_ptr);
+ mpi_errno = MPIR_Barrier_intra(newcomm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -167,46 +170,50 @@
if (comm_ptr->is_low_group) {
/* bcast to right*/
root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
- mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, &errflag);
+ mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
- MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
/* receive bcast from right */
root = 0;
- mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, &errflag);
+ mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
- MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
}
else {
/* receive bcast from left */
root = 0;
- mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, &errflag);
+ mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
- MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
/* bcast to left */
root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
- mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, &errflag);
+ mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
- MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
}
fn_exit:
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -220,17 +227,17 @@
#define FUNCNAME MPIR_Barrier
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPIR_Barrier(MPID_Comm *comm_ptr)
+int MPIR_Barrier(MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->comm_kind == MPID_INTRACOMM) {
/* intracommunicator */
- mpi_errno = MPIR_Barrier_intra( comm_ptr );
+ mpi_errno = MPIR_Barrier_intra( comm_ptr, errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
/* intercommunicator */
- mpi_errno = MPIR_Barrier_inter( comm_ptr );
+ mpi_errno = MPIR_Barrier_inter( comm_ptr, errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -249,14 +256,13 @@
#define FUNCNAME MPIR_Barrier_impl
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPIR_Barrier_impl(MPID_Comm *comm_ptr)
+int MPIR_Barrier_impl(MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
int mpi_errno_ret = MPI_SUCCESS;
- int errflag = FALSE;
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Barrier != NULL)
{
- mpi_errno = comm_ptr->coll_fns->Barrier(comm_ptr);
+ mpi_errno = comm_ptr->coll_fns->Barrier(comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
else
@@ -268,9 +274,10 @@
/* do the intranode barrier on all nodes */
if (comm_ptr->node_comm != NULL)
{
- mpi_errno = MPIR_Barrier_or_coll_fn(comm_ptr->node_comm);
+ mpi_errno = MPIR_Barrier_or_coll_fn(comm_ptr->node_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -278,9 +285,10 @@
/* do the barrier across roots of all nodes */
if (comm_ptr->node_roots_comm != NULL) {
- mpi_errno = MPIR_Barrier_or_coll_fn(comm_ptr->node_roots_comm);
+ mpi_errno = MPIR_Barrier_or_coll_fn(comm_ptr->node_roots_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -291,27 +299,27 @@
if (comm_ptr->node_comm != NULL)
{
int i=0;
- mpi_errno = MPIR_Bcast_impl(&i, 1, MPI_BYTE, 0, comm_ptr->node_comm, &errflag);
+ mpi_errno = MPIR_Bcast_impl(&i, 1, MPI_BYTE, 0, comm_ptr->node_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
- MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
}
}
else {
- mpi_errno = MPIR_Barrier_intra( comm_ptr );
+ mpi_errno = MPIR_Barrier_intra( comm_ptr, errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
#else
- mpi_errno = MPIR_Barrier_intra( comm_ptr );
+ mpi_errno = MPIR_Barrier_intra( comm_ptr, errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
#endif
}
else {
/* intercommunicator */
- mpi_errno = MPIR_Barrier_inter( comm_ptr );
+ mpi_errno = MPIR_Barrier_inter( comm_ptr, errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
}
@@ -319,6 +327,8 @@
fn_exit:
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -359,6 +369,7 @@
{
int mpi_errno = MPI_SUCCESS;
MPID_Comm *comm_ptr = NULL;
+ int errflag = FALSE;
MPID_MPI_STATE_DECL(MPID_STATE_MPI_BARRIER);
MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -396,7 +407,7 @@
/* ... body of routine ... */
- mpi_errno = MPIR_Barrier_impl(comm_ptr);
+ mpi_errno = MPIR_Barrier_impl(comm_ptr, &errflag);
if (mpi_errno) goto fn_fail;
/* ... end of body of routine ... */
Modified: mpich2/trunk/src/mpid/ch3/channels/nemesis/src/ch3i_comm.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/src/ch3i_comm.c 2011-01-21 18:41:45 UTC (rev 7806)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/src/ch3i_comm.c 2011-01-21 19:53:51 UTC (rev 7807)
@@ -8,7 +8,7 @@
#define NULL_CONTEXT_ID -1
-static int barrier (MPID_Comm *comm_ptr);
+static int barrier (MPID_Comm *comm_ptr, int *errflag);
static int alloc_barrier_vars (MPID_Comm *comm, MPID_nem_barrier_vars_t **vars);
static MPID_Collops collective_functions = {
@@ -142,7 +142,7 @@
#define FUNCNAME barrier
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
-static int barrier (MPID_Comm *comm_ptr)
+static int barrier (MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
MPID_nem_barrier_vars_t *barrier_vars;
Modified: mpich2/trunk/src/mpid/ch3/src/ch3u_port.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/src/ch3u_port.c 2011-01-21 18:41:45 UTC (rev 7806)
+++ mpich2/trunk/src/mpid/ch3/src/ch3u_port.c 2011-01-21 19:53:51 UTC (rev 7807)
@@ -504,7 +504,7 @@
}
/*printf("connect:barrier\n");fflush(stdout);*/
- mpi_errno = MPIR_Barrier_intra(comm_ptr);
+ mpi_errno = MPIR_Barrier_intra(comm_ptr, &errflag);
if (mpi_errno != MPI_SUCCESS) {
MPIU_ERR_POP(mpi_errno);
}
@@ -1091,7 +1091,7 @@
}
MPIU_DBG_MSG(CH3_CONNECT,VERBOSE,"Barrier");
- mpi_errno = MPIR_Barrier_intra(comm_ptr);
+ mpi_errno = MPIR_Barrier_intra(comm_ptr, &errflag);
if (mpi_errno != MPI_SUCCESS) {
MPIU_ERR_POP(mpi_errno);
}
@@ -1146,7 +1146,7 @@
MPID_Comm *intercomm )
{
int mpi_errno = MPI_SUCCESS, i;
-
+ int errflag = FALSE;
/* FIXME: How much of this could/should be common with the
upper level (src/mpi/comm/ *.c) code? For best robustness,
this should use the same routine (not copy/paste code) as
@@ -1181,7 +1181,7 @@
}
MPIU_DBG_MSG(CH3_CONNECT,VERBOSE,"Barrier");
- mpi_errno = MPIR_Barrier_intra(comm_ptr);
+ mpi_errno = MPIR_Barrier_intra(comm_ptr, &errflag);
if (mpi_errno != MPI_SUCCESS) {
MPIU_ERR_POP(mpi_errno);
}
More information about the mpich2-commits
mailing list