[mpich2-commits] r7803 - in mpich2/trunk: . confdb maint src/include src/mpi/coll src/mpi/comm src/mpi/errhan src/mpi/topo src/mpid src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp src/mpid/ch3/src src/mpl/src src/pm/hydra src/pm/hydra/examples src/pm/hydra/include src/pm/hydra/pm src/pm/hydra/tools src/pm/hydra/tools/bootstrap/external src/pm/hydra/tools/bootstrap/src src/pm/hydra/tools/bootstrap/utils src/pm/hydra/ui src/pm/hydra/utils
buntinas at mcs.anl.gov
buntinas at mcs.anl.gov
Fri Jan 21 12:32:12 CST 2011
Author: buntinas
Date: 2011-01-21 12:32:12 -0600 (Fri, 21 Jan 2011)
New Revision: 7803
Modified:
mpich2/trunk/
mpich2/trunk/confdb/
mpich2/trunk/maint/Version
mpich2/trunk/src/include/mpiimpl.h
mpich2/trunk/src/mpi/coll/allgather.c
mpich2/trunk/src/mpi/coll/allgatherv.c
mpich2/trunk/src/mpi/coll/allreduce.c
mpich2/trunk/src/mpi/coll/alltoall.c
mpich2/trunk/src/mpi/coll/alltoallv.c
mpich2/trunk/src/mpi/coll/alltoallw.c
mpich2/trunk/src/mpi/coll/barrier.c
mpich2/trunk/src/mpi/coll/bcast.c
mpich2/trunk/src/mpi/coll/exscan.c
mpich2/trunk/src/mpi/coll/gather.c
mpich2/trunk/src/mpi/coll/gatherv.c
mpich2/trunk/src/mpi/coll/helper_fns.c
mpich2/trunk/src/mpi/coll/red_scat.c
mpich2/trunk/src/mpi/coll/red_scat_block.c
mpich2/trunk/src/mpi/coll/reduce.c
mpich2/trunk/src/mpi/coll/scan.c
mpich2/trunk/src/mpi/coll/scatter.c
mpich2/trunk/src/mpi/coll/scatterv.c
mpich2/trunk/src/mpi/comm/comm_create.c
mpich2/trunk/src/mpi/comm/comm_split.c
mpich2/trunk/src/mpi/comm/commutil.c
mpich2/trunk/src/mpi/comm/intercomm_create.c
mpich2/trunk/src/mpi/comm/intercomm_merge.c
mpich2/trunk/src/mpi/errhan/errnames.txt
mpich2/trunk/src/mpi/topo/dist_gr_create.c
mpich2/trunk/src/mpid/
mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c
mpich2/trunk/src/mpid/ch3/src/ch3u_comm_spawn_multiple.c
mpich2/trunk/src/mpid/ch3/src/ch3u_port.c
mpich2/trunk/src/mpid/ch3/src/ch3u_rma_ops.c
mpich2/trunk/src/mpid/ch3/src/ch3u_rma_sync.c
mpich2/trunk/src/mpid/ch3/src/mpid_vc.c
mpich2/trunk/src/mpl/src/mplstr.c
mpich2/trunk/src/pm/hydra/
mpich2/trunk/src/pm/hydra/Makefile.am
mpich2/trunk/src/pm/hydra/README
mpich2/trunk/src/pm/hydra/autogen.sh
mpich2/trunk/src/pm/hydra/configure.in
mpich2/trunk/src/pm/hydra/examples/
mpich2/trunk/src/pm/hydra/hydra-doxygen.cfg.in
mpich2/trunk/src/pm/hydra/include/
mpich2/trunk/src/pm/hydra/mpich2prereq
mpich2/trunk/src/pm/hydra/pm/
mpich2/trunk/src/pm/hydra/tools/
mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm_query_proxy_id.c
mpich2/trunk/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c
mpich2/trunk/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c
mpich2/trunk/src/pm/hydra/ui/
mpich2/trunk/src/pm/hydra/utils/
mpich2/trunk/winconfigure.wsf
Log:
merging error returns for collective branch into trunk
Property changes on: mpich2/trunk
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/ckpt:5050
/mpich2/branches/dev/ckpt2:5057-6537
/mpich2/branches/dev/error-return:7662-7670
/mpich2/branches/dev/ftb:5661-5730
/mpich2/branches/dev/lapi:5817
/mpich2/branches/dev/wintcp_async_progress:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2:5406
+ /mpich2/branches/dev/ckpt:5050
/mpich2/branches/dev/ckpt2:5057-6537
/mpich2/branches/dev/coll-err-ret:7771-7802
/mpich2/branches/dev/error-return:7662-7670
/mpich2/branches/dev/ftb:5661-5730
/mpich2/branches/dev/lapi:5817
/mpich2/branches/dev/wintcp_async_progress:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2:5406
Property changes on: mpich2/trunk/confdb
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/ckpt2/confdb:5180,5182,5196,5198
/mpich2/branches/dev/error-return/confdb:7662-7670
/mpich2/branches/dev/ftb/confdb:5661-5730
/mpich2/branches/dev/lapi/confdb:5817
/mpich2/branches/dev/wintcp_async_progress/confdb:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/confdb:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/confdb:5406
+ /mpich2/branches/dev/ckpt2/confdb:5180,5182,5196,5198
/mpich2/branches/dev/coll-err-ret/confdb:7771-7802
/mpich2/branches/dev/error-return/confdb:7662-7670
/mpich2/branches/dev/ftb/confdb:5661-5730
/mpich2/branches/dev/lapi/confdb:5817
/mpich2/branches/dev/wintcp_async_progress/confdb:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/confdb:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/confdb:5406
Property changes on: mpich2/trunk/maint/Version
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/ckpt/maint/Version:5050
/mpich2/branches/dev/ckpt2/maint/Version:5057-6537
/mpich2/branches/dev/error-return/maint/Version:7662-7670
/mpich2/branches/dev/ftb/maint/Version:5661-5730
/mpich2/branches/dev/lapi/maint/Version:5817
/mpich2/branches/dev/wintcp_async_progress/maint/Version:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/maint/Version:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/maint/Version:5406
/mpich2/trunk/src/pm/hydra/VERSION:7662-7666
+ /mpich2/branches/dev/ckpt/maint/Version:5050
/mpich2/branches/dev/ckpt2/maint/Version:5057-6537
/mpich2/branches/dev/coll-err-ret/maint/Version:7771-7802
/mpich2/branches/dev/error-return/maint/Version:7662-7670
/mpich2/branches/dev/ftb/maint/Version:5661-5730
/mpich2/branches/dev/lapi/maint/Version:5817
/mpich2/branches/dev/wintcp_async_progress/maint/Version:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/maint/Version:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/maint/Version:5406
/mpich2/trunk/src/pm/hydra/VERSION:7662-7666
Modified: mpich2/trunk/src/include/mpiimpl.h
===================================================================
--- mpich2/trunk/src/include/mpiimpl.h 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/include/mpiimpl.h 2011-01-21 18:32:12 UTC (rev 7803)
@@ -1739,35 +1739,35 @@
int ref_count; /* Supports lazy copies */
/* Contains pointers to the functions for the MPI collectives */
int (*Barrier) (MPID_Comm *);
- int (*Bcast) (void*, int, MPI_Datatype, int, MPID_Comm * );
+ int (*Bcast) (void*, int, MPI_Datatype, int, MPID_Comm *, int *);
int (*Gather) (void*, int, MPI_Datatype, void*, int, MPI_Datatype,
- int, MPID_Comm *);
+ int, MPID_Comm *, int *);
int (*Gatherv) (void*, int, MPI_Datatype, void*, int *, int *,
- MPI_Datatype, int, MPID_Comm *);
+ MPI_Datatype, int, MPID_Comm *, int *);
int (*Scatter) (void*, int, MPI_Datatype, void*, int, MPI_Datatype,
- int, MPID_Comm *);
+ int, MPID_Comm *, int *);
int (*Scatterv) (void*, int *, int *, MPI_Datatype, void*, int,
- MPI_Datatype, int, MPID_Comm *);
+ MPI_Datatype, int, MPID_Comm *, int *);
int (*Allgather) (void*, int, MPI_Datatype, void*, int,
- MPI_Datatype, MPID_Comm *);
+ MPI_Datatype, MPID_Comm *, int *);
int (*Allgatherv) (void*, int, MPI_Datatype, void*, int *, int *,
- MPI_Datatype, MPID_Comm *);
+ MPI_Datatype, MPID_Comm *, int *);
int (*Alltoall) (void*, int, MPI_Datatype, void*, int, MPI_Datatype,
- MPID_Comm *);
+ MPID_Comm *, int *);
int (*Alltoallv) (void*, int *, int *, MPI_Datatype, void*, int *,
- int *, MPI_Datatype, MPID_Comm *);
+ int *, MPI_Datatype, MPID_Comm *, int *);
int (*Alltoallw) (void*, int *, int *, MPI_Datatype *, void*, int *,
- int *, MPI_Datatype *, MPID_Comm *);
+ int *, MPI_Datatype *, MPID_Comm *, int *);
int (*Reduce) (void*, void*, int, MPI_Datatype, MPI_Op, int,
- MPID_Comm *);
+ MPID_Comm *, int *);
int (*Allreduce) (void*, void*, int, MPI_Datatype, MPI_Op,
- MPID_Comm *);
+ MPID_Comm *, int *);
int (*Reduce_scatter) (void*, void*, int *, MPI_Datatype, MPI_Op,
- MPID_Comm *);
- int (*Scan) (void*, void*, int, MPI_Datatype, MPI_Op, MPID_Comm * );
- int (*Exscan) (void*, void*, int, MPI_Datatype, MPI_Op, MPID_Comm * );
+ MPID_Comm *, int *);
+ int (*Scan) (void*, void*, int, MPI_Datatype, MPI_Op, MPID_Comm *, int * );
+ int (*Exscan) (void*, void*, int, MPI_Datatype, MPI_Op, MPID_Comm *, int * );
int (*Reduce_scatter_block) (void*, void*, int, MPI_Datatype, MPI_Op,
- MPID_Comm *);
+ MPID_Comm *, int *);
/* MPI-3 nonblocking collectives */
int (*Ibarrier)(MPID_Comm *comm_ptr, MPID_Sched_t s);
@@ -3260,6 +3260,7 @@
#define MPIR_TOPO_A_TAG 26
#define MPIR_TOPO_B_TAG 27
#define MPIR_REDUCE_SCATTER_BLOCK_TAG 28
+#define MPIR_ERROR_TAG 29
/* These functions are used in the implementation of collective and
other internal operations. They are wrappers around MPID send/recv
@@ -3288,6 +3289,28 @@
int MPIC_Wait(MPID_Request * request_ptr);
int MPIC_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status);
+/* FT versions of te MPIC_ functions */
+int MPIC_Send_ft(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+ MPI_Comm comm, int *errflag);
+int MPIC_Recv_ft(void *buf, int count, MPI_Datatype datatype, int source, int tag,
+ MPI_Comm comm, MPI_Status *status, int *errflag);
+int MPIC_Ssend_ft(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+ MPI_Comm comm, int *errflag);
+int MPIC_Sendrecv_ft(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+ int dest, int sendtag, void *recvbuf, int recvcount,
+ MPI_Datatype recvtype, int source, int recvtag,
+ MPI_Comm comm, MPI_Status *status, int *errflag);
+int MPIC_Sendrecv_replace_ft(void *buf, int count, MPI_Datatype datatype,
+ int dest, int sendtag,
+ int source, int recvtag,
+ MPI_Comm comm, MPI_Status *status, int *errflag);
+int MPIC_Isend_ft(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+ MPI_Comm comm, MPI_Request *request, int *errflag);
+int MPIC_Irecv_ft(void *buf, int count, MPI_Datatype datatype, int source,
+ int tag, MPI_Comm comm, MPI_Request *request);
+int MPIC_Waitall_ft(int numreq, MPI_Request requests[], MPI_Status statuses[], int *errflag);
+
+
void MPIR_MAXF ( void *, void *, int *, MPI_Datatype * ) ;
void MPIR_MINF ( void *, void *, int *, MPI_Datatype * ) ;
void MPIR_SUM ( void *, void *, int *, MPI_Datatype * ) ;
@@ -3330,160 +3353,160 @@
int MPIR_Allgather_impl(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype,
- MPID_Comm *comm_ptr );
+ MPID_Comm *comm_ptr, int *errflag );
int MPIR_Allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype,
- MPID_Comm *comm_ptr );
+ MPID_Comm *comm_ptr, int *errflag );
int MPIR_Allgather_intra(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype,
- MPID_Comm *comm_ptr );
+ MPID_Comm *comm_ptr, int *errflag );
int MPIR_Allgather_inter(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype,
- MPID_Comm *comm_ptr );
+ MPID_Comm *comm_ptr, int *errflag );
int MPIR_Allgatherv_impl(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int *recvcounts, int *displs,
- MPI_Datatype recvtype, MPID_Comm *comm_ptr );
+ MPI_Datatype recvtype, MPID_Comm *comm_ptr, int *errflag );
int MPIR_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int *recvcounts, int *displs,
- MPI_Datatype recvtype, MPID_Comm *comm_ptr );
+ MPI_Datatype recvtype, MPID_Comm *comm_ptr, int *errflag );
int MPIR_Allgatherv_intra(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int *recvcounts, int *displs,
- MPI_Datatype recvtype, MPID_Comm *comm_ptr );
+ MPI_Datatype recvtype, MPID_Comm *comm_pt, int *errflag );
int MPIR_Allgatherv_inter(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int *recvcounts, int *displs,
- MPI_Datatype recvtype, MPID_Comm *comm_ptr );
+ MPI_Datatype recvtype, MPID_Comm *comm_ptr, int *errflag );
int MPIR_Allreduce_impl(void *sendbuf, void *recvbuf, int count,
- MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr);
+ MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Allreduce(void *sendbuf, void *recvbuf, int count,
- MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr);
+ MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Allreduce_intra(void *sendbuf, void *recvbuf, int count,
- MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr);
+ MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Allreduce_inter(void *sendbuf, void *recvbuf, int count,
- MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr);
+ MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Alltoall_impl(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype,
- MPID_Comm *comm_ptr);
+ MPID_Comm *comm_ptr, int *errflag);
int MPIR_Alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype,
- MPID_Comm *comm_ptr);
+ MPID_Comm *comm_ptr, int *errflag);
int MPIR_Alltoall_intra(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype,
- MPID_Comm *comm_ptr);
+ MPID_Comm *comm_ptr, int *errflag);
int MPIR_Alltoall_inter(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype,
- MPID_Comm *comm_ptr);
+ MPID_Comm *comm_ptr, int *errflag);
int MPIR_Alltoallv_impl(void *sendbuf, int *sendcnts, int *sdispls,
MPI_Datatype sendtype, void *recvbuf, int *recvcnts,
- int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr);
+ int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Alltoallv(void *sendbuf, int *sendcnts, int *sdispls,
MPI_Datatype sendtype, void *recvbuf, int *recvcnts,
- int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr);
+ int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Alltoallv_intra(void *sendbuf, int *sendcnts, int *sdispls,
MPI_Datatype sendtype, void *recvbuf, int *recvcnts,
- int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr);
+ int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Alltoallv_inter(void *sendbuf, int *sendcnts, int *sdispls,
MPI_Datatype sendtype, void *recvbuf, int *recvcnts,
int *rdispls, MPI_Datatype recvtype,
- MPID_Comm *comm_ptr);
+ MPID_Comm *comm_ptr, int *errflag);
int MPIR_Alltoallw_impl(void *sendbuf, int *sendcnts, int *sdispls,
MPI_Datatype *sendtypes, void *recvbuf, int *recvcnts,
- int *rdispls, MPI_Datatype *recvtypes, MPID_Comm *comm_ptr);
+ int *rdispls, MPI_Datatype *recvtypes, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Alltoallw(void *sendbuf, int *sendcnts, int *sdispls,
MPI_Datatype *sendtypes, void *recvbuf, int *recvcnts,
- int *rdispls, MPI_Datatype *recvtypes, MPID_Comm *comm_ptr);
+ int *rdispls, MPI_Datatype *recvtypes, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Alltoallw_intra(void *sendbuf, int *sendcnts, int *sdispls,
MPI_Datatype *sendtypes, void *recvbuf, int *recvcnts,
- int *rdispls, MPI_Datatype *recvtypes, MPID_Comm *comm_ptr);
+ int *rdispls, MPI_Datatype *recvtypes, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Alltoallw_inter(void *sendbuf, int *sendcnts, int *sdispls,
MPI_Datatype *sendtypes, void *recvbuf,
int *recvcnts, int *rdispls, MPI_Datatype *recvtypes,
- MPID_Comm *comm_ptr);
+ MPID_Comm *comm_ptr, int *errflag);
int MPIR_Bcast_inter(void *buffer, int count, MPI_Datatype datatype,
- int root, MPID_Comm *comm_ptr);
+ int root, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Bcast_intra (void *buffer, int count, MPI_Datatype datatype, int
- root, MPID_Comm *comm_ptr);
+ root, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Bcast (void *buffer, int count, MPI_Datatype datatype, int
- root, MPID_Comm *comm_ptr);
+ root, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Bcast_impl (void *buffer, int count, MPI_Datatype datatype, int
- root, MPID_Comm *comm_ptr);
+ root, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
- MPI_Op op, MPID_Comm *comm_ptr );
+ MPI_Op op, MPID_Comm *comm_ptr, int *errflag );
int MPIR_Exscan_impl(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
- MPI_Op op, MPID_Comm *comm_ptr );
+ MPI_Op op, MPID_Comm *comm_ptr, int *errflag );
int MPIR_Gather_impl (void *sendbuf, int sendcnt, MPI_Datatype sendtype,
void *recvbuf, int recvcnt, MPI_Datatype recvtype,
- int root, MPID_Comm *comm_ptr);
+ int root, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Gather (void *sendbuf, int sendcnt, MPI_Datatype sendtype,
void *recvbuf, int recvcnt, MPI_Datatype recvtype,
- int root, MPID_Comm *comm_ptr);
+ int root, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Gather_intra (void *sendbuf, int sendcnt, MPI_Datatype sendtype,
void *recvbuf, int recvcnt, MPI_Datatype recvtype,
- int root, MPID_Comm *comm_ptr);
+ int root, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Gather_inter (void *sendbuf, int sendcnt, MPI_Datatype sendtype,
void *recvbuf, int recvcnt, MPI_Datatype recvtype,
- int root, MPID_Comm *comm_ptr );
+ int root, MPID_Comm *comm_ptr, int *errflag );
int MPIR_Gatherv (void *sendbuf, int sendcnt, MPI_Datatype sendtype,
void *recvbuf, int *recvcnts, int *displs,
- MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr);
+ MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Gatherv_impl (void *sendbuf, int sendcnt, MPI_Datatype sendtype,
void *recvbuf, int *recvcnts, int *displs,
- MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr);
+ MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Reduce_scatter_impl(void *sendbuf, void *recvbuf, int *recvcnts,
- MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr);
+ MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Reduce_scatter(void *sendbuf, void *recvbuf, int *recvcnts,
- MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr);
+ MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Reduce_scatter_intra(void *sendbuf, void *recvbuf, int *recvcnts,
- MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr);
+ MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Reduce_scatter_inter(void *sendbuf, void *recvbuf, int *recvcnts,
MPI_Datatype datatype, MPI_Op op,
- MPID_Comm *comm_ptr);
+ MPID_Comm *comm_ptr, int *errflag);
int MPIR_Reduce_scatter_block_impl(void *sendbuf, void *recvbuf, int recvcount,
MPI_Datatype datatype, MPI_Op op, MPID_Comm
- *comm_ptr );
+ *comm_ptr, int *errflag );
int MPIR_Reduce_scatter_block(void *sendbuf, void *recvbuf, int recvcount,
MPI_Datatype datatype, MPI_Op op, MPID_Comm
- *comm_ptr );
+ *comm_ptr, int *errflag );
int MPIR_Reduce_scatter_block_intra(void *sendbuf, void *recvbuf, int recvcount,
MPI_Datatype datatype, MPI_Op op, MPID_Comm
- *comm_ptr );
+ *comm_ptr, int *errflag );
int MPIR_Reduce_scatter_block_inter(void *sendbuf, void *recvbuf, int recvcount,
MPI_Datatype datatype, MPI_Op op, MPID_Comm
- *comm_ptr);
+ *comm_ptr, int *errflag);
int MPIR_Reduce_impl(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
- MPI_Op op, int root, MPID_Comm *comm_ptr );
+ MPI_Op op, int root, MPID_Comm *comm_ptr, int *errflag );
int MPIR_Reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
- MPI_Op op, int root, MPID_Comm *comm_ptr );
+ MPI_Op op, int root, MPID_Comm *comm_ptr, int *errflag );
int MPIR_Reduce_intra(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
- MPI_Op op, int root, MPID_Comm *comm_ptr );
+ MPI_Op op, int root, MPID_Comm *comm_ptr, int *errflag );
int MPIR_Reduce_inter (void *sendbuf, void *recvbuf, int count, MPI_Datatype
- datatype, MPI_Op op, int root, MPID_Comm *comm_ptr);
+ datatype, MPI_Op op, int root, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Scan_impl(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
- MPI_Op op, MPID_Comm *comm_ptr);
+ MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
- MPI_Op op, MPID_Comm *comm_ptr);
+ MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
int MPIR_Scatter_impl(void *sendbuf, int sendcnt, MPI_Datatype sendtype,
void *recvbuf, int recvcnt, MPI_Datatype recvtype,
- int root, MPID_Comm *comm_ptr );
+ int root, MPID_Comm *comm_ptr, int *errflag );
int MPIR_Scatter(void *sendbuf, int sendcnt, MPI_Datatype sendtype,
void *recvbuf, int recvcnt, MPI_Datatype recvtype,
- int root, MPID_Comm *comm_ptr );
+ int root, MPID_Comm *comm_ptr, int *errflag );
int MPIR_Scatter_intra(void *sendbuf, int sendcnt, MPI_Datatype sendtype,
void *recvbuf, int recvcnt, MPI_Datatype recvtype,
- int root, MPID_Comm *comm_ptr );
+ int root, MPID_Comm *comm_ptr, int *errflag );
int MPIR_Scatter_inter(void *sendbuf, int sendcnt, MPI_Datatype sendtype,
void *recvbuf, int recvcnt, MPI_Datatype recvtype,
- int root, MPID_Comm *comm_ptr );
+ int root, MPID_Comm *comm_ptr, int *errflag );
int MPIR_Scatterv_impl (void *sendbuf, int *sendcnts, int *displs,
MPI_Datatype sendtype, void *recvbuf, int recvcnt,
MPI_Datatype recvtype, int root, MPID_Comm
- *comm_ptr);
+ *comm_ptr, int *errflag);
int MPIR_Scatterv (void *sendbuf, int *sendcnts, int *displs,
MPI_Datatype sendtype, void *recvbuf, int recvcnt,
MPI_Datatype recvtype, int root, MPID_Comm
- *comm_ptr);
+ *comm_ptr, int *errflag);
int MPIR_Barrier_impl( MPID_Comm *comm_ptr);
int MPIR_Barrier( MPID_Comm *comm_ptr);
-int MPIR_Barrier_intra( MPID_Comm *comm_ptr );
+int MPIR_Barrier_intra( MPID_Comm *comm_ptr);
int MPIR_Barrier_inter( MPID_Comm *comm_ptr);
int MPIR_Reduce_local_impl(void *inbuf, void *inoutbuf, int count, MPI_Datatype datatype, MPI_Op op);
Modified: mpich2/trunk/src/mpi/coll/allgather.c
===================================================================
--- mpich2/trunk/src/mpi/coll/allgather.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/allgather.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -81,7 +81,8 @@
void *recvbuf,
int recvcount,
MPI_Datatype recvtype,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
int comm_size, rank;
int mpi_errno = MPI_SUCCESS;
@@ -167,15 +168,16 @@
recv_offset = dst_tree_root * recvcount * recvtype_extent;
if (dst < comm_size) {
- mpi_errno = MPIC_Sendrecv(((char *)recvbuf + send_offset),
- curr_cnt, recvtype, dst,
- MPIR_ALLGATHER_TAG,
- ((char *)recvbuf + recv_offset),
- (comm_size-dst_tree_root)*recvcount,
- recvtype, dst,
- MPIR_ALLGATHER_TAG, comm, &status);
+ mpi_errno = MPIC_Sendrecv_ft(((char *)recvbuf + send_offset),
+ curr_cnt, recvtype, dst,
+ MPIR_ALLGATHER_TAG,
+ ((char *)recvbuf + recv_offset),
+ (comm_size-dst_tree_root)*recvcount,
+ recvtype, dst,
+ MPIR_ALLGATHER_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
last_recv_cnt = 0;
@@ -231,15 +233,16 @@
if ((dst > rank) &&
(rank < tree_root + nprocs_completed)
&& (dst >= tree_root + nprocs_completed)) {
- mpi_errno = MPIC_Send(((char *)recvbuf + offset),
- last_recv_cnt,
- recvtype, dst,
- MPIR_ALLGATHER_TAG, comm);
+ mpi_errno = MPIC_Send_ft(((char *)recvbuf + offset),
+ last_recv_cnt,
+ recvtype, dst,
+ MPIR_ALLGATHER_TAG, comm, errflag);
/* last_recv_cnt was set in the previous
receive. that's the amount of data to be
sent now. */
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -249,15 +252,16 @@
else if ((dst < rank) &&
(dst < tree_root + nprocs_completed) &&
(rank >= tree_root + nprocs_completed)) {
- mpi_errno = MPIC_Recv(((char *)recvbuf + offset),
- (comm_size - (my_tree_root + mask))*recvcount,
- recvtype, dst,
- MPIR_ALLGATHER_TAG,
- comm, &status);
+ mpi_errno = MPIC_Recv_ft(((char *)recvbuf + offset),
+ (comm_size - (my_tree_root + mask))*recvcount,
+ recvtype, dst,
+ MPIR_ALLGATHER_TAG,
+ comm, &status, errflag);
/* nprocs_completed is also equal to the
no. of processes whose data we don't have */
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
last_recv_cnt = 0;
@@ -332,15 +336,16 @@
recv_offset = dst_tree_root * nbytes;
if (dst < comm_size) {
- mpi_errno = MPIC_Sendrecv(((char *)tmp_buf + send_offset),
- curr_cnt, MPI_BYTE, dst,
- MPIR_ALLGATHER_TAG,
- ((char *)tmp_buf + recv_offset),
- tmp_buf_size - recv_offset,
- MPI_BYTE, dst,
- MPIR_ALLGATHER_TAG, comm, &status);
+ mpi_errno = MPIC_Sendrecv_ft(((char *)tmp_buf + send_offset),
+ curr_cnt, MPI_BYTE, dst,
+ MPIR_ALLGATHER_TAG,
+ ((char *)tmp_buf + recv_offset),
+ tmp_buf_size - recv_offset,
+ MPI_BYTE, dst,
+ MPIR_ALLGATHER_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
last_recv_cnt = 0;
@@ -389,12 +394,13 @@
(rank < tree_root + nprocs_completed)
&& (dst >= tree_root + nprocs_completed)) {
- mpi_errno = MPIC_Send(((char *)tmp_buf + offset),
- last_recv_cnt, MPI_BYTE,
- dst, MPIR_ALLGATHER_TAG,
- comm);
+ mpi_errno = MPIC_Send_ft(((char *)tmp_buf + offset),
+ last_recv_cnt, MPI_BYTE,
+ dst, MPIR_ALLGATHER_TAG,
+ comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -407,15 +413,16 @@
else if ((dst < rank) &&
(dst < tree_root + nprocs_completed) &&
(rank >= tree_root + nprocs_completed)) {
- mpi_errno = MPIC_Recv(((char *)tmp_buf + offset),
- tmp_buf_size - offset,
- MPI_BYTE, dst,
- MPIR_ALLGATHER_TAG,
- comm, &status);
+ mpi_errno = MPIC_Recv_ft(((char *)tmp_buf + offset),
+ tmp_buf_size - offset,
+ MPI_BYTE, dst,
+ MPIR_ALLGATHER_TAG,
+ comm, &status, errflag);
/* nprocs_completed is also equal to the
no. of processes whose data we don't have */
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
last_recv_cnt = 0;
@@ -482,14 +489,15 @@
src = (rank + pof2) % comm_size;
dst = (rank - pof2 + comm_size) % comm_size;
- mpi_errno = MPIC_Sendrecv(tmp_buf, curr_cnt, recvtype, dst,
- MPIR_ALLGATHER_TAG,
- ((char *)tmp_buf + curr_cnt*recvtype_extent),
- curr_cnt, recvtype,
- src, MPIR_ALLGATHER_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Sendrecv_ft(tmp_buf, curr_cnt, recvtype, dst,
+ MPIR_ALLGATHER_TAG,
+ ((char *)tmp_buf + curr_cnt*recvtype_extent),
+ curr_cnt, recvtype,
+ src, MPIR_ALLGATHER_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -504,14 +512,15 @@
src = (rank + pof2) % comm_size;
dst = (rank - pof2 + comm_size) % comm_size;
- mpi_errno = MPIC_Sendrecv(tmp_buf, rem * recvcount, recvtype,
- dst, MPIR_ALLGATHER_TAG,
- ((char *)tmp_buf + curr_cnt*recvtype_extent),
- rem * recvcount, recvtype,
- src, MPIR_ALLGATHER_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Sendrecv_ft(tmp_buf, rem * recvcount, recvtype,
+ dst, MPIR_ALLGATHER_TAG,
+ ((char *)tmp_buf + curr_cnt*recvtype_extent),
+ rem * recvcount, recvtype,
+ src, MPIR_ALLGATHER_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -562,17 +571,18 @@
j = rank;
jnext = left;
for (i=1; i<comm_size; i++) {
- mpi_errno = MPIC_Sendrecv(((char *)recvbuf +
- j*recvcount*recvtype_extent),
- recvcount, recvtype, right,
- MPIR_ALLGATHER_TAG,
- ((char *)recvbuf +
- jnext*recvcount*recvtype_extent),
- recvcount, recvtype, left,
- MPIR_ALLGATHER_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Sendrecv_ft(((char *)recvbuf +
+ j*recvcount*recvtype_extent),
+ recvcount, recvtype, right,
+ MPIR_ALLGATHER_TAG,
+ ((char *)recvbuf +
+ jnext*recvcount*recvtype_extent),
+ recvcount, recvtype, left,
+ MPIR_ALLGATHER_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -587,6 +597,9 @@
MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
return mpi_errno;
fn_fail:
@@ -608,7 +621,8 @@
void *recvbuf,
int recvcount,
MPI_Datatype recvtype,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag)
{
/* Intercommunicator Allgather.
Each group does a gather to local root with the local
@@ -650,9 +664,10 @@
if (sendcount != 0) {
mpi_errno = MPIR_Gather_impl(sendbuf, sendcount, sendtype, tmp_buf, sendcount,
- sendtype, 0, newcomm_ptr);
+ sendtype, 0, newcomm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -665,9 +680,10 @@
if (sendcount != 0) {
root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
mpi_errno = MPIR_Bcast_inter(tmp_buf, sendcount*local_size,
- sendtype, root, comm_ptr);
+ sendtype, root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -677,9 +693,10 @@
if (recvcount != 0) {
root = 0;
mpi_errno = MPIR_Bcast_inter(recvbuf, recvcount*remote_size,
- recvtype, root, comm_ptr);
+ recvtype, root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -690,9 +707,10 @@
if (recvcount != 0) {
root = 0;
mpi_errno = MPIR_Bcast_inter(recvbuf, recvcount*remote_size,
- recvtype, root, comm_ptr);
+ recvtype, root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -702,9 +720,10 @@
if (sendcount != 0) {
root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
mpi_errno = MPIR_Bcast_inter(tmp_buf, sendcount*local_size,
- sendtype, root, comm_ptr);
+ sendtype, root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -715,6 +734,9 @@
MPIU_CHKLMEM_FREEALL();
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
return mpi_errno;
fn_fail:
@@ -732,7 +754,7 @@
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype,
- MPID_Comm *comm_ptr)
+ MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
@@ -740,13 +762,13 @@
/* intracommunicator */
mpi_errno = MPIR_Allgather_intra(sendbuf, sendcount, sendtype,
recvbuf, recvcount, recvtype,
- comm_ptr);
+ comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
/* intercommunicator */
mpi_errno = MPIR_Allgather_inter(sendbuf, sendcount, sendtype,
recvbuf, recvcount, recvtype,
- comm_ptr);
+ comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -766,7 +788,7 @@
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Allgather_impl(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype,
- MPID_Comm *comm_ptr)
+ MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
@@ -774,12 +796,12 @@
{
mpi_errno = comm_ptr->coll_fns->Allgather(sendbuf, sendcount, sendtype,
recvbuf, recvcount, recvtype,
- comm_ptr);
+ comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
mpi_errno = MPIR_Allgather(sendbuf, sendcount, sendtype,
recvbuf, recvcount, recvtype,
- comm_ptr);
+ comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -845,6 +867,7 @@
{
int mpi_errno = MPI_SUCCESS;
MPID_Comm *comm_ptr = NULL;
+ int errflag = FALSE;
MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLGATHER);
MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -917,7 +940,7 @@
mpi_errno = MPIR_Allgather_impl(sendbuf, sendcount, sendtype,
recvbuf, recvcount, recvtype,
- comm_ptr);
+ comm_ptr, &errflag);
if (mpi_errno) goto fn_fail;
/* ... end of body of routine ... */
Modified: mpich2/trunk/src/mpi/coll/allgatherv.c
===================================================================
--- mpich2/trunk/src/mpi/coll/allgatherv.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/allgatherv.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -75,7 +75,8 @@
int *recvcounts,
int *displs,
MPI_Datatype recvtype,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
MPI_Comm comm;
int comm_size, rank, j, i, left, right;
@@ -185,15 +186,16 @@
for (j=0; j<dst_tree_root; j++)
recv_offset += recvcounts[j];
- mpi_errno = MPIC_Sendrecv(((char *)tmp_buf + send_offset * recvtype_extent),
- curr_cnt, recvtype, dst,
- MPIR_ALLGATHERV_TAG,
- ((char *)tmp_buf + recv_offset * recvtype_extent),
- total_count - recv_offset, recvtype, dst,
- MPIR_ALLGATHERV_TAG,
- comm, &status);
+ mpi_errno = MPIC_Sendrecv_ft(((char *)tmp_buf + send_offset * recvtype_extent),
+ curr_cnt, recvtype, dst,
+ MPIR_ALLGATHERV_TAG,
+ ((char *)tmp_buf + recv_offset * recvtype_extent),
+ total_count - recv_offset, recvtype, dst,
+ MPIR_ALLGATHERV_TAG,
+ comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
last_recv_cnt = 0;
@@ -255,12 +257,13 @@
offset += recvcounts[j];
offset *= recvtype_extent;
- mpi_errno = MPIC_Send(((char *)tmp_buf + offset),
- last_recv_cnt,
- recvtype, dst,
- MPIR_ALLGATHERV_TAG, comm);
+ mpi_errno = MPIC_Send_ft(((char *)tmp_buf + offset),
+ last_recv_cnt,
+ recvtype, dst,
+ MPIR_ALLGATHERV_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -278,12 +281,13 @@
for (j=0; j<(my_tree_root+mask); j++)
offset += recvcounts[j];
- mpi_errno = MPIC_Recv(((char *)tmp_buf + offset * recvtype_extent),
- total_count - offset, recvtype,
- dst, MPIR_ALLGATHERV_TAG,
- comm, &status);
+ mpi_errno = MPIC_Recv_ft(((char *)tmp_buf + offset * recvtype_extent),
+ total_count - offset, recvtype,
+ dst, MPIR_ALLGATHERV_TAG,
+ comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
last_recv_cnt = 0;
@@ -384,14 +388,15 @@
recv_offset *= nbytes;
if (dst < comm_size) {
- mpi_errno = MPIC_Sendrecv(((char *)tmp_buf + send_offset),
- curr_cnt, MPI_BYTE, dst,
- MPIR_ALLGATHERV_TAG,
- ((char *)tmp_buf + recv_offset),
- tmp_buf_size-recv_offset, MPI_BYTE, dst,
- MPIR_ALLGATHERV_TAG, comm, &status);
+ mpi_errno = MPIC_Sendrecv_ft(((char *)tmp_buf + send_offset),
+ curr_cnt, MPI_BYTE, dst,
+ MPIR_ALLGATHERV_TAG,
+ ((char *)tmp_buf + recv_offset),
+ tmp_buf_size-recv_offset, MPI_BYTE, dst,
+ MPIR_ALLGATHERV_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
last_recv_cnt = 0;
@@ -445,12 +450,13 @@
(rank < tree_root + nprocs_completed)
&& (dst >= tree_root + nprocs_completed)) {
- mpi_errno = MPIC_Send(((char *)tmp_buf + offset),
- last_recv_cnt, MPI_BYTE,
- dst, MPIR_ALLGATHERV_TAG,
- comm);
+ mpi_errno = MPIC_Send_ft(((char *)tmp_buf + offset),
+ last_recv_cnt, MPI_BYTE,
+ dst, MPIR_ALLGATHERV_TAG,
+ comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -463,13 +469,14 @@
else if ((dst < rank) &&
(dst < tree_root + nprocs_completed) &&
(rank >= tree_root + nprocs_completed)) {
- mpi_errno = MPIC_Recv(((char *)tmp_buf + offset),
- tmp_buf_size-offset, MPI_BYTE,
- dst,
- MPIR_ALLGATHERV_TAG,
- comm, &status);
+ mpi_errno = MPIC_Recv_ft(((char *)tmp_buf + offset),
+ tmp_buf_size-offset, MPI_BYTE,
+ dst,
+ MPIR_ALLGATHERV_TAG,
+ comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
last_recv_cnt = 0;
@@ -544,13 +551,14 @@
src = (rank + pof2) % comm_size;
dst = (rank - pof2 + comm_size) % comm_size;
- mpi_errno = MPIC_Sendrecv(tmp_buf, curr_cnt, recvtype, dst,
- MPIR_ALLGATHERV_TAG,
- ((char *)tmp_buf + curr_cnt*recvtype_extent),
- total_count - curr_cnt, recvtype,
- src, MPIR_ALLGATHERV_TAG, comm, &status);
+ mpi_errno = MPIC_Sendrecv_ft(tmp_buf, curr_cnt, recvtype, dst,
+ MPIR_ALLGATHERV_TAG,
+ ((char *)tmp_buf + curr_cnt*recvtype_extent),
+ total_count - curr_cnt, recvtype,
+ src, MPIR_ALLGATHERV_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
recv_cnt = 0;
@@ -572,14 +580,15 @@
for (i=0; i<rem; i++)
send_cnt += recvcounts[(rank+i)%comm_size];
- mpi_errno = MPIC_Sendrecv(tmp_buf, send_cnt, recvtype,
- dst, MPIR_ALLGATHERV_TAG,
- ((char *)tmp_buf + curr_cnt*recvtype_extent),
- total_count - curr_cnt, recvtype,
- src, MPIR_ALLGATHERV_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Sendrecv_ft(tmp_buf, send_cnt, recvtype,
+ dst, MPIR_ALLGATHERV_TAG,
+ ((char *)tmp_buf + curr_cnt*recvtype_extent),
+ total_count - curr_cnt, recvtype,
+ src, MPIR_ALLGATHERV_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -664,29 +673,32 @@
* consecutive processes contribute 0 bytes each. */
}
else if (!sendnow) { /* If there's no data to send, just do a recv call */
- mpi_errno = MPIC_Recv(rbuf, recvnow, recvtype, left, MPIR_ALLGATHERV_TAG, comm, &status);
+ mpi_errno = MPIC_Recv_ft(rbuf, recvnow, recvtype, left, MPIR_ALLGATHERV_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
torecv -= recvnow;
}
else if (!recvnow) { /* If there's no data to receive, just do a send call */
- mpi_errno = MPIC_Send(sbuf, sendnow, recvtype, right, MPIR_ALLGATHERV_TAG, comm);
+ mpi_errno = MPIC_Send_ft(sbuf, sendnow, recvtype, right, MPIR_ALLGATHERV_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
tosend -= sendnow;
}
else { /* There's data to be sent and received */
- mpi_errno = MPIC_Sendrecv(sbuf, sendnow, recvtype, right, MPIR_ALLGATHERV_TAG,
- rbuf, recvnow, recvtype, left, MPIR_ALLGATHERV_TAG,
- comm, &status);
+ mpi_errno = MPIC_Sendrecv_ft(sbuf, sendnow, recvtype, right, MPIR_ALLGATHERV_TAG,
+ rbuf, recvnow, recvtype, left, MPIR_ALLGATHERV_TAG,
+ comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -713,6 +725,9 @@
MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -733,7 +748,8 @@
int *recvcounts,
int *displs,
MPI_Datatype recvtype,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
/* Intercommunicator Allgatherv.
This is done differently from the intercommunicator allgather
@@ -758,9 +774,10 @@
root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
mpi_errno = MPIR_Gatherv_impl(sendbuf, sendcount, sendtype, recvbuf,
recvcounts, displs, recvtype, root,
- comm_ptr);
+ comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -768,9 +785,10 @@
root = 0;
mpi_errno = MPIR_Gatherv_impl(sendbuf, sendcount, sendtype, recvbuf,
recvcounts, displs, recvtype, root,
- comm_ptr);
+ comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -780,9 +798,10 @@
root = 0;
mpi_errno = MPIR_Gatherv_impl(sendbuf, sendcount, sendtype, recvbuf,
recvcounts, displs, recvtype, root,
- comm_ptr);
+ comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -790,9 +809,10 @@
root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
mpi_errno = MPIR_Gatherv_impl(sendbuf, sendcount, sendtype, recvbuf,
recvcounts, displs, recvtype, root,
- comm_ptr);
+ comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -815,9 +835,10 @@
mpi_errno = MPIR_Type_commit_impl(&newtype);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
- mpi_errno = MPIR_Bcast_intra(recvbuf, 1, newtype, 0, newcomm_ptr);
+ mpi_errno = MPIR_Bcast_intra(recvbuf, 1, newtype, 0, newcomm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -827,6 +848,9 @@
fn_exit:
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
return mpi_errno;
fn_fail:
/* --BEGIN ERROR HANDLING-- */
@@ -848,7 +872,7 @@
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype,
- MPID_Comm *comm_ptr)
+ MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
@@ -856,13 +880,13 @@
/* intracommunicator */
mpi_errno = MPIR_Allgatherv_intra(sendbuf, sendcount, sendtype,
recvbuf, recvcounts, displs, recvtype,
- comm_ptr);
+ comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
/* intracommunicator */
mpi_errno = MPIR_Allgatherv_inter(sendbuf, sendcount, sendtype,
recvbuf, recvcounts, displs, recvtype,
- comm_ptr);
+ comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -884,17 +908,17 @@
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Allgatherv_impl(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype,
- MPID_Comm *comm_ptr)
+ MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Allgatherv != NULL) {
mpi_errno = comm_ptr->coll_fns->Allgatherv(sendbuf, sendcount, sendtype,
- recvbuf, recvcounts, displs, recvtype, comm_ptr);
+ recvbuf, recvcounts, displs, recvtype, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
mpi_errno = MPIR_Allgatherv(sendbuf, sendcount, sendtype,
- recvbuf, recvcounts, displs, recvtype, comm_ptr);
+ recvbuf, recvcounts, displs, recvtype, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -967,6 +991,7 @@
{
int mpi_errno = MPI_SUCCESS;
MPID_Comm *comm_ptr = NULL;
+ int errflag = FALSE;
MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLGATHERV);
MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -1046,7 +1071,7 @@
mpi_errno = MPIR_Allgatherv_impl(sendbuf, sendcount, sendtype,
recvbuf, recvcounts, displs, recvtype,
- comm_ptr);
+ comm_ptr, &errflag);
if (mpi_errno) goto fn_fail;
/* ... end of body of routine ... */
Modified: mpich2/trunk/src/mpi/coll/allreduce.c
===================================================================
--- mpich2/trunk/src/mpi/coll/allreduce.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/allreduce.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -93,15 +93,15 @@
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
static inline int allreduce_intra_or_coll_fn(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
- MPID_Comm *comm_ptr)
+ MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Allreduce != NULL) {
- mpi_errno = comm_ptr->coll_fns->Allreduce(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+ mpi_errno = comm_ptr->coll_fns->Allreduce(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
- mpi_errno = MPIR_Allreduce_intra(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+ mpi_errno = MPIR_Allreduce_intra(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -124,7 +124,8 @@
int count,
MPI_Datatype datatype,
MPI_Op op,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
int is_homogeneous;
#ifdef MPID_HAS_HETERO
@@ -174,16 +175,18 @@
/* IN_PLACE and not root of reduce. Data supplied to this
allreduce is in recvbuf. Pass that as the sendbuf to reduce. */
- mpi_errno = MPIR_Reduce_impl(recvbuf, NULL, count, datatype, op, 0, comm_ptr->node_comm);
+ mpi_errno = MPIR_Reduce_impl(recvbuf, NULL, count, datatype, op, 0, comm_ptr->node_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
} else {
- mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype, op, 0, comm_ptr->node_comm);
+ mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype, op, 0, comm_ptr->node_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -198,9 +201,11 @@
/* now do an IN_PLACE allreduce among the local roots of all nodes */
if (comm_ptr->node_roots_comm != NULL) {
- mpi_errno = allreduce_intra_or_coll_fn(MPI_IN_PLACE, recvbuf, count, datatype, op, comm_ptr->node_roots_comm);
+ mpi_errno = allreduce_intra_or_coll_fn(MPI_IN_PLACE, recvbuf, count, datatype, op, comm_ptr->node_roots_comm,
+ errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -208,9 +213,10 @@
/* now broadcast the result among local processes */
if (comm_ptr->node_comm != NULL) {
- mpi_errno = MPIR_Bcast_impl(recvbuf, count, datatype, 0, comm_ptr->node_comm);
+ mpi_errno = MPIR_Bcast_impl(recvbuf, count, datatype, 0, comm_ptr->node_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -231,16 +237,18 @@
/* heterogeneous. To get the same result on all processes, we
do a reduce to 0 and then broadcast. */
mpi_errno = MPIR_Reduce_impl ( sendbuf, recvbuf, count, datatype,
- op, 0, comm_ptr );
+ op, 0, comm_ptr, errflag );
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
- mpi_errno = MPIR_Bcast_impl( recvbuf, count, datatype, 0, comm_ptr );
+ mpi_errno = MPIR_Bcast_impl( recvbuf, count, datatype, 0, comm_ptr, errflag );
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -314,11 +322,12 @@
if (rank < 2*rem) {
if (rank % 2 == 0) { /* even */
- mpi_errno = MPIC_Send(recvbuf, count,
- datatype, rank+1,
- MPIR_ALLREDUCE_TAG, comm);
+ mpi_errno = MPIC_Send_ft(recvbuf, count,
+ datatype, rank+1,
+ MPIR_ALLREDUCE_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -329,12 +338,13 @@
newrank = -1;
}
else { /* odd */
- mpi_errno = MPIC_Recv(tmp_buf, count,
- datatype, rank-1,
- MPIR_ALLREDUCE_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Recv_ft(tmp_buf, count,
+ datatype, rank-1,
+ MPIR_ALLREDUCE_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -381,13 +391,14 @@
/* Send the most current data, which is in recvbuf. Recv
into tmp_buf */
- mpi_errno = MPIC_Sendrecv(recvbuf, count, datatype,
- dst, MPIR_ALLREDUCE_TAG, tmp_buf,
- count, datatype, dst,
- MPIR_ALLREDUCE_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Sendrecv_ft(recvbuf, count, datatype,
+ dst, MPIR_ALLREDUCE_TAG, tmp_buf,
+ count, datatype, dst,
+ MPIR_ALLREDUCE_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -477,17 +488,18 @@
send_cnt, recv_cnt, last_idx);
*/
/* Send data from recvbuf. Recv into tmp_buf */
- mpi_errno = MPIC_Sendrecv((char *) recvbuf +
- disps[send_idx]*extent,
- send_cnt, datatype,
- dst, MPIR_ALLREDUCE_TAG,
- (char *) tmp_buf +
- disps[recv_idx]*extent,
- recv_cnt, datatype, dst,
- MPIR_ALLREDUCE_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Sendrecv_ft((char *) recvbuf +
+ disps[send_idx]*extent,
+ send_cnt, datatype,
+ dst, MPIR_ALLREDUCE_TAG,
+ (char *) tmp_buf +
+ disps[recv_idx]*extent,
+ recv_cnt, datatype, dst,
+ MPIR_ALLREDUCE_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -541,17 +553,18 @@
recv_cnt += cnts[i];
}
- mpi_errno = MPIC_Sendrecv((char *) recvbuf +
- disps[send_idx]*extent,
- send_cnt, datatype,
- dst, MPIR_ALLREDUCE_TAG,
- (char *) recvbuf +
- disps[recv_idx]*extent,
- recv_cnt, datatype, dst,
- MPIR_ALLREDUCE_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Sendrecv_ft((char *) recvbuf +
+ disps[send_idx]*extent,
+ send_cnt, datatype,
+ dst, MPIR_ALLREDUCE_TAG,
+ (char *) recvbuf +
+ disps[recv_idx]*extent,
+ recv_cnt, datatype, dst,
+ MPIR_ALLREDUCE_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -568,16 +581,17 @@
(rank-1), the ranks who didn't participate above. */
if (rank < 2*rem) {
if (rank % 2) /* odd */
- mpi_errno = MPIC_Send(recvbuf, count,
- datatype, rank-1,
- MPIR_ALLREDUCE_TAG, comm);
+ mpi_errno = MPIC_Send_ft(recvbuf, count,
+ datatype, rank-1,
+ MPIR_ALLREDUCE_TAG, comm, errflag);
else /* even */
- mpi_errno = MPIC_Recv(recvbuf, count,
- datatype, rank+1,
- MPIR_ALLREDUCE_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Recv_ft(recvbuf, count,
+ datatype, rank+1,
+ MPIR_ALLREDUCE_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -613,7 +627,8 @@
int count,
MPI_Datatype datatype,
MPI_Op op,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
/* Intercommunicator Allreduce.
We first do an intercommunicator reduce to rank 0 on left group,
@@ -635,9 +650,10 @@
/* reduce from right group to rank 0*/
root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
mpi_errno = MPIR_Reduce_inter(sendbuf, recvbuf, count, datatype, op,
- root, comm_ptr);
+ root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -645,9 +661,10 @@
/* reduce to rank 0 of right group */
root = 0;
mpi_errno = MPIR_Reduce_inter(sendbuf, recvbuf, count, datatype, op,
- root, comm_ptr);
+ root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -656,9 +673,10 @@
/* reduce to rank 0 of left group */
root = 0;
mpi_errno = MPIR_Reduce_inter(sendbuf, recvbuf, count, datatype, op,
- root, comm_ptr);
+ root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -666,9 +684,10 @@
/* reduce from right group to rank 0 */
root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
mpi_errno = MPIR_Reduce_inter(sendbuf, recvbuf, count, datatype, op,
- root, comm_ptr);
+ root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -680,9 +699,10 @@
newcomm_ptr = comm_ptr->local_comm;
- mpi_errno = MPIR_Bcast_impl(recvbuf, count, datatype, 0, newcomm_ptr);
+ mpi_errno = MPIR_Bcast_impl(recvbuf, count, datatype, 0, newcomm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -690,6 +710,9 @@
fn_exit:
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
return mpi_errno;
fn_fail:
@@ -704,18 +727,19 @@
#define FUNCNAME MPIR_Allreduce
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPIR_Allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr)
+int MPIR_Allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr,
+ int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->comm_kind == MPID_INTRACOMM) {
/* intracommunicator */
- mpi_errno = MPIR_Allreduce_intra(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+ mpi_errno = MPIR_Allreduce_intra(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
else {
/* intercommunicator */
- mpi_errno = MPIR_Allreduce_inter(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+ mpi_errno = MPIR_Allreduce_inter(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -734,25 +758,26 @@
#define FUNCNAME MPIR_Allreduce_impl
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPIR_Allreduce_impl(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr)
+int MPIR_Allreduce_impl(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr,
+ int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Allreduce != NULL)
{
- mpi_errno = comm_ptr->coll_fns->Allreduce(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+ mpi_errno = comm_ptr->coll_fns->Allreduce(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
else
{
if (comm_ptr->comm_kind == MPID_INTRACOMM) {
/* intracommunicator */
- mpi_errno = MPIR_Allreduce_intra(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+ mpi_errno = MPIR_Allreduce_intra(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
else {
/* intercommunicator */
- mpi_errno = MPIR_Allreduce_inter(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+ mpi_errno = MPIR_Allreduce_inter(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
}
@@ -803,6 +828,7 @@
static const char FCNAME[] = "MPI_Allreduce";
int mpi_errno = MPI_SUCCESS;
MPID_Comm *comm_ptr = NULL;
+ int errflag = FALSE;
MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLREDUCE);
MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -875,7 +901,7 @@
/* ... body of routine ... */
- mpi_errno = MPIR_Allreduce_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+ mpi_errno = MPIR_Allreduce_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, &errflag);
if (mpi_errno) goto fn_fail;
/* ... end of body of routine ... */
Modified: mpich2/trunk/src/mpi/coll/alltoall.c
===================================================================
--- mpich2/trunk/src/mpi/coll/alltoall.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/alltoall.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -82,7 +82,8 @@
void *recvbuf,
int recvcount,
MPI_Datatype recvtype,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
int comm_size, i, j, pof2;
MPI_Aint sendtype_extent, recvtype_extent;
@@ -134,26 +135,28 @@
for (j = i; j < comm_size; ++j) {
if (rank == i) {
/* also covers the (rank == i && rank == j) case */
- mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + j*recvcount*recvtype_extent),
- recvcount, recvtype,
- j, MPIR_ALLTOALL_TAG,
- j, MPIR_ALLTOALL_TAG,
- comm, &status);
+ mpi_errno = MPIC_Sendrecv_replace_ft(((char *)recvbuf + j*recvcount*recvtype_extent),
+ recvcount, recvtype,
+ j, MPIR_ALLTOALL_TAG,
+ j, MPIR_ALLTOALL_TAG,
+ comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
else if (rank == j) {
/* same as above with i/j args reversed */
- mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + i*recvcount*recvtype_extent),
- recvcount, recvtype,
- i, MPIR_ALLTOALL_TAG,
- i, MPIR_ALLTOALL_TAG,
- comm, &status);
+ mpi_errno = MPIC_Sendrecv_replace_ft(((char *)recvbuf + i*recvcount*recvtype_extent),
+ recvcount, recvtype,
+ i, MPIR_ALLTOALL_TAG,
+ i, MPIR_ALLTOALL_TAG,
+ comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -221,12 +224,13 @@
mpi_errno = MPIR_Pack_impl(recvbuf, 1, newtype, tmp_buf, pack_size, &position);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
- mpi_errno = MPIC_Sendrecv(tmp_buf, position, MPI_PACKED, dst,
- MPIR_ALLTOALL_TAG, recvbuf, 1, newtype,
- src, MPIR_ALLTOALL_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Sendrecv_ft(tmp_buf, position, MPI_PACKED, dst,
+ MPIR_ALLTOALL_TAG, recvbuf, 1, newtype,
+ src, MPIR_ALLTOALL_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -306,17 +310,18 @@
my_tree_root <<= i;
if (dst < comm_size) {
- mpi_errno = MPIC_Sendrecv(((char *)tmp_buf +
- my_tree_root*sendbuf_extent),
- curr_cnt, sendtype,
- dst, MPIR_ALLTOALL_TAG,
- ((char *)tmp_buf +
- dst_tree_root*sendbuf_extent),
- sendbuf_extent*(comm_size-dst_tree_root),
- sendtype, dst, MPIR_ALLTOALL_TAG,
- comm, &status);
+ mpi_errno = MPIC_Sendrecv_ft(((char *)tmp_buf +
+ my_tree_root*sendbuf_extent),
+ curr_cnt, sendtype,
+ dst, MPIR_ALLTOALL_TAG,
+ ((char *)tmp_buf +
+ dst_tree_root*sendbuf_extent),
+ sendbuf_extent*(comm_size-dst_tree_root),
+ sendtype, dst, MPIR_ALLTOALL_TAG,
+ comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
last_recv_cnt = 0;
@@ -363,13 +368,14 @@
(rank < tree_root + nprocs_completed)
&& (dst >= tree_root + nprocs_completed)) {
/* send the data received in this step above */
- mpi_errno = MPIC_Send(((char *)tmp_buf +
- dst_tree_root*sendbuf_extent),
- last_recv_cnt, sendtype,
- dst, MPIR_ALLTOALL_TAG,
- comm);
+ mpi_errno = MPIC_Send_ft(((char *)tmp_buf +
+ dst_tree_root*sendbuf_extent),
+ last_recv_cnt, sendtype,
+ dst, MPIR_ALLTOALL_TAG,
+ comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -379,14 +385,15 @@
else if ((dst < rank) &&
(dst < tree_root + nprocs_completed) &&
(rank >= tree_root + nprocs_completed)) {
- mpi_errno = MPIC_Recv(((char *)tmp_buf +
- dst_tree_root*sendbuf_extent),
- sendbuf_extent*(comm_size-dst_tree_root),
- sendtype,
- dst, MPIR_ALLTOALL_TAG,
- comm, &status);
+ mpi_errno = MPIC_Recv_ft(((char *)tmp_buf +
+ dst_tree_root*sendbuf_extent),
+ sendbuf_extent*(comm_size-dst_tree_root),
+ sendtype,
+ dst, MPIR_ALLTOALL_TAG,
+ comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
last_recv_cnt = 0;
@@ -451,26 +458,26 @@
/* do the communication -- post ss sends and receives: */
for ( i=0; i<ss; i++ ) {
dst = (rank+i+ii) % comm_size;
- mpi_errno = MPIC_Irecv((char *)recvbuf +
- dst*recvcount*recvtype_extent,
- recvcount, recvtype, dst,
- MPIR_ALLTOALL_TAG, comm,
- &reqarray[i]);
+ mpi_errno = MPIC_Irecv_ft((char *)recvbuf +
+ dst*recvcount*recvtype_extent,
+ recvcount, recvtype, dst,
+ MPIR_ALLTOALL_TAG, comm,
+ &reqarray[i]);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
for ( i=0; i<ss; i++ ) {
dst = (rank-i-ii+comm_size) % comm_size;
- mpi_errno = MPIC_Isend((char *)sendbuf +
- dst*sendcount*sendtype_extent,
- sendcount, sendtype, dst,
- MPIR_ALLTOALL_TAG, comm,
- &reqarray[i+ss]);
+ mpi_errno = MPIC_Isend_ft((char *)sendbuf +
+ dst*sendcount*sendtype_extent,
+ sendcount, sendtype, dst,
+ MPIR_ALLTOALL_TAG, comm,
+ &reqarray[i+ss], errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
/* ... then wait for them to finish: */
- mpi_errno = MPIR_Waitall_impl(2*ss,reqarray,starray);
+ mpi_errno = MPIC_Waitall_ft(2*ss,reqarray,starray, errflag);
if (mpi_errno && mpi_errno != MPI_ERR_IN_STATUS) MPIU_ERR_POP(mpi_errno);
/* --BEGIN ERROR HANDLING-- */
@@ -480,6 +487,7 @@
mpi_errno = starray[j].MPI_ERROR;
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -524,16 +532,17 @@
dst = (rank + i) % comm_size;
}
- mpi_errno = MPIC_Sendrecv(((char *)sendbuf +
- dst*sendcount*sendtype_extent),
- sendcount, sendtype, dst,
- MPIR_ALLTOALL_TAG,
- ((char *)recvbuf +
- src*recvcount*recvtype_extent),
- recvcount, recvtype, src,
- MPIR_ALLTOALL_TAG, comm, &status);
+ mpi_errno = MPIC_Sendrecv_ft(((char *)sendbuf +
+ dst*sendcount*sendtype_extent),
+ sendcount, sendtype, dst,
+ MPIR_ALLTOALL_TAG,
+ ((char *)recvbuf +
+ src*recvcount*recvtype_extent),
+ recvcount, recvtype, src,
+ MPIR_ALLTOALL_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -546,6 +555,9 @@
MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
return mpi_errno;
fn_fail:
if (newtype != MPI_DATATYPE_NULL)
@@ -567,7 +579,8 @@
void *recvbuf,
int recvcount,
MPI_Datatype recvtype,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
/* Intercommunicator alltoall. We use a pairwise exchange algorithm
similar to the one used in intracommunicator alltoall for long
@@ -623,12 +636,13 @@
sendaddr = (char *)sendbuf + dst*sendcount*sendtype_extent;
}
- mpi_errno = MPIC_Sendrecv(sendaddr, sendcount, sendtype, dst,
- MPIR_ALLTOALL_TAG, recvaddr,
- recvcount, recvtype, src,
- MPIR_ALLTOALL_TAG, comm, &status);
+ mpi_errno = MPIC_Sendrecv_ft(sendaddr, sendcount, sendtype, dst,
+ MPIR_ALLTOALL_TAG, recvaddr,
+ recvcount, recvtype, src,
+ MPIR_ALLTOALL_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -639,6 +653,9 @@
MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -651,20 +668,21 @@
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype,
- MPID_Comm *comm_ptr)
+ MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->comm_kind == MPID_INTRACOMM) {
/* intracommunicator */
mpi_errno = MPIR_Alltoall_intra(sendbuf, sendcount, sendtype,
- recvbuf, recvcount, recvtype, comm_ptr);
+ recvbuf, recvcount, recvtype,
+ comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
/* intercommunicator */
mpi_errno = MPIR_Alltoall_inter(sendbuf, sendcount, sendtype,
recvbuf, recvcount, recvtype,
- comm_ptr);
+ comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -680,17 +698,18 @@
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Alltoall_impl(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype,
- MPID_Comm *comm_ptr)
+ MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Alltoall != NULL) {
mpi_errno = comm_ptr->coll_fns->Alltoall(sendbuf, sendcount, sendtype,
recvbuf, recvcount, recvtype,
- comm_ptr);
+ comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
mpi_errno = MPIR_Alltoall(sendbuf, sendcount, sendtype,
- recvbuf, recvcount, recvtype, comm_ptr);
+ recvbuf, recvcount, recvtype,
+ comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -737,6 +756,7 @@
{
int mpi_errno = MPI_SUCCESS;
MPID_Comm *comm_ptr = NULL;
+ int errflag = FALSE;
MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLTOALL);
MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -798,7 +818,7 @@
/* ... body of routine ... */
- mpi_errno = MPIR_Alltoall_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr);
+ mpi_errno = MPIR_Alltoall_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr, &errflag);
if (mpi_errno) goto fn_fail;
/* ... end of body of routine ... */
Modified: mpich2/trunk/src/mpi/coll/alltoallv.c
===================================================================
--- mpich2/trunk/src/mpi/coll/alltoallv.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/alltoallv.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -64,11 +64,13 @@
int *recvcnts,
int *rdispls,
MPI_Datatype recvtype,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag)
{
int comm_size, i, j;
MPI_Aint send_extent, recv_extent;
int mpi_errno = MPI_SUCCESS;
+ int mpi_errno_ret = MPI_SUCCESS;
MPI_Status *starray;
MPI_Status status;
MPI_Request *reqarray;
@@ -104,21 +106,32 @@
for (j = i; j < comm_size; ++j) {
if (rank == i) {
/* also covers the (rank == i && rank == j) case */
- mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + rdispls[j]*recv_extent),
- recvcnts[j], recvtype,
- j, MPIR_ALLTOALLV_TAG,
- j, MPIR_ALLTOALLV_TAG,
- comm, &status);
- if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ mpi_errno = MPIC_Sendrecv_replace_ft(((char *)recvbuf + rdispls[j]*recv_extent),
+ recvcnts[j], recvtype,
+ j, MPIR_ALLTOALLV_TAG,
+ j, MPIR_ALLTOALLV_TAG,
+ comm, &status, errflag);
+ if (mpi_errno) {
+ /* for communication errors, just record the error but continue */
+ *errflag = TRUE;
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+ MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
+ }
+
}
else if (rank == j) {
/* same as above with i/j args reversed */
- mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + rdispls[i]*recv_extent),
- recvcnts[i], recvtype,
- i, MPIR_ALLTOALLV_TAG,
- i, MPIR_ALLTOALLV_TAG,
- comm, &status);
- if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ mpi_errno = MPIC_Sendrecv_replace_ft(((char *)recvbuf + rdispls[i]*recv_extent),
+ recvcnts[i], recvtype,
+ i, MPIR_ALLTOALLV_TAG,
+ i, MPIR_ALLTOALLV_TAG,
+ comm, &status, errflag);
+ if (mpi_errno) {
+ /* for communication errors, just record the error but continue */
+ *errflag = TRUE;
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+ MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
+ }
}
}
}
@@ -145,11 +158,16 @@
if (type_size) {
MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
rdispls[dst]*recv_extent);
- mpi_errno = MPIC_Irecv((char *)recvbuf+rdispls[dst]*recv_extent,
- recvcnts[dst], recvtype, dst,
- MPIR_ALLTOALLV_TAG, comm,
- &reqarray[req_cnt]);
- if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+ mpi_errno = MPIC_Irecv_ft((char *)recvbuf+rdispls[dst]*recv_extent,
+ recvcnts[dst], recvtype, dst,
+ MPIR_ALLTOALLV_TAG, comm,
+ &reqarray[req_cnt]);
+ if (mpi_errno) {
+ /* for communication errors, just record the error but continue */
+ *errflag = TRUE;
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+ MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
+ }
req_cnt++;
}
}
@@ -162,17 +180,22 @@
if (type_size) {
MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
sdispls[dst]*send_extent);
- mpi_errno = MPIC_Isend((char *)sendbuf+sdispls[dst]*send_extent,
- sendcnts[dst], sendtype, dst,
- MPIR_ALLTOALLV_TAG, comm,
- &reqarray[req_cnt]);
- if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+ mpi_errno = MPIC_Isend_ft((char *)sendbuf+sdispls[dst]*send_extent,
+ sendcnts[dst], sendtype, dst,
+ MPIR_ALLTOALLV_TAG, comm,
+ &reqarray[req_cnt], errflag);
+ if (mpi_errno) {
+ /* for communication errors, just record the error but continue */
+ *errflag = TRUE;
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+ MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
+ }
req_cnt++;
}
}
}
- mpi_errno = MPIR_Waitall_impl(req_cnt, reqarray, starray);
+ mpi_errno = MPIC_Waitall_ft(req_cnt, reqarray, starray, errflag);
if (mpi_errno && mpi_errno != MPI_ERR_IN_STATUS) MPIU_ERR_POP(mpi_errno);
/* --BEGIN ERROR HANDLING-- */
@@ -180,7 +203,12 @@
for (i=0; i<req_cnt; i++) {
if (starray[i].MPI_ERROR != MPI_SUCCESS) {
mpi_errno = starray[i].MPI_ERROR;
- MPIU_ERR_POP(mpi_errno);
+ if (mpi_errno) {
+ /* for communication errors, just record the error but continue */
+ *errflag = TRUE;
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+ MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
+ }
}
}
}
@@ -192,8 +220,14 @@
/* check if multiple threads are calling this collective function */
MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
MPIU_CHKLMEM_FREEALL();
- return (mpi_errno);
+ if (mpi_errno_ret)
+ mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
+ return mpi_errno;
+
fn_fail:
goto fn_exit;
}
@@ -214,7 +248,8 @@
int *recvcnts,
int *rdispls,
MPI_Datatype recvtype,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
/* Intercommunicator alltoallv. We use a pairwise exchange algorithm
similar to the one used in intracommunicator alltoallv. Since the
@@ -231,6 +266,7 @@
int local_size, remote_size, max_size, i;
MPI_Aint send_extent, recv_extent;
int mpi_errno = MPI_SUCCESS;
+ int mpi_errno_ret = MPI_SUCCESS;
MPI_Status status;
int src, dst, rank, sendcount, recvcount;
char *sendaddr, *recvaddr;
@@ -276,17 +312,26 @@
sendcount = sendcnts[dst];
}
- mpi_errno = MPIC_Sendrecv(sendaddr, sendcount, sendtype, dst,
- MPIR_ALLTOALLV_TAG, recvaddr, recvcount,
- recvtype, src, MPIR_ALLTOALLV_TAG,
- comm, &status);
- if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ mpi_errno = MPIC_Sendrecv_ft(sendaddr, sendcount, sendtype, dst,
+ MPIR_ALLTOALLV_TAG, recvaddr, recvcount,
+ recvtype, src, MPIR_ALLTOALLV_TAG,
+ comm, &status, errflag);
+ if (mpi_errno) {
+ /* for communication errors, just record the error but continue */
+ *errflag = TRUE;
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+ MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
+ }
}
fn_exit:
/* check if multiple threads are calling this collective function */
MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
- return (mpi_errno);
+ if (mpi_errno_ret)
+ mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+ return mpi_errno;
fn_fail:
goto fn_exit;
}
@@ -298,7 +343,7 @@
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Alltoallv(void *sendbuf, int *sendcnts, int *sdispls, MPI_Datatype sendtype,
void *recvbuf, int *recvcnts, int *rdispls, MPI_Datatype recvtype,
- MPID_Comm *comm_ptr)
+ MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
@@ -306,13 +351,13 @@
/* intracommunicator */
mpi_errno = MPIR_Alltoallv_intra(sendbuf, sendcnts, sdispls,
sendtype, recvbuf, recvcnts,
- rdispls, recvtype, comm_ptr);
+ rdispls, recvtype, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
/* intercommunicator */
mpi_errno = MPIR_Alltoallv_inter(sendbuf, sendcnts, sdispls,
sendtype, recvbuf, recvcnts,
- rdispls, recvtype, comm_ptr);
+ rdispls, recvtype, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -328,19 +373,19 @@
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Alltoallv_impl(void *sendbuf, int *sendcnts, int *sdispls, MPI_Datatype sendtype,
void *recvbuf, int *recvcnts, int *rdispls, MPI_Datatype recvtype,
- MPID_Comm *comm_ptr)
+ MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Alltoallv != NULL) {
mpi_errno = comm_ptr->coll_fns->Alltoallv(sendbuf, sendcnts, sdispls,
sendtype, recvbuf, recvcnts,
- rdispls, recvtype, comm_ptr);
+ rdispls, recvtype, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
mpi_errno = MPIR_Alltoallv(sendbuf, sendcnts, sdispls,
sendtype, recvbuf, recvcnts,
- rdispls, recvtype, comm_ptr);
+ rdispls, recvtype, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -400,6 +445,7 @@
{
int mpi_errno = MPI_SUCCESS;
MPID_Comm *comm_ptr = NULL;
+ int errflag = FALSE;
MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLTOALLV);
MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -485,7 +531,7 @@
mpi_errno = MPIR_Alltoallv_impl(sendbuf, sendcnts, sdispls,
sendtype, recvbuf, recvcnts,
- rdispls, recvtype, comm_ptr);
+ rdispls, recvtype, comm_ptr, &errflag);
if (mpi_errno) goto fn_fail;
/* ... end of body of routine ... */
Modified: mpich2/trunk/src/mpi/coll/alltoallw.c
===================================================================
--- mpich2/trunk/src/mpi/coll/alltoallw.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/alltoallw.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -59,7 +59,8 @@
int *recvcnts,
int *rdispls,
MPI_Datatype *recvtypes,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
int comm_size, i, j;
int mpi_errno = MPI_SUCCESS;
@@ -96,26 +97,28 @@
for (j = i; j < comm_size; ++j) {
if (rank == i) {
/* also covers the (rank == i && rank == j) case */
- mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + rdispls[j]),
- recvcnts[j], recvtypes[j],
- j, MPIR_ALLTOALLW_TAG,
- j, MPIR_ALLTOALLW_TAG,
- comm, &status);
+ mpi_errno = MPIC_Sendrecv_replace_ft(((char *)recvbuf + rdispls[j]),
+ recvcnts[j], recvtypes[j],
+ j, MPIR_ALLTOALLW_TAG,
+ j, MPIR_ALLTOALLW_TAG,
+ comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
else if (rank == j) {
/* same as above with i/j args reversed */
- mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + rdispls[i]),
- recvcnts[i], recvtypes[i],
- i, MPIR_ALLTOALLW_TAG,
- i, MPIR_ALLTOALLW_TAG,
- comm, &status);
+ mpi_errno = MPIC_Sendrecv_replace_ft(((char *)recvbuf + rdispls[i]),
+ recvcnts[i], recvtypes[i],
+ i, MPIR_ALLTOALLW_TAG,
+ i, MPIR_ALLTOALLW_TAG,
+ comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -141,10 +144,10 @@
if (recvcnts[dst]) {
MPID_Datatype_get_size_macro(recvtypes[dst], type_size);
if (type_size) {
- mpi_errno = MPIC_Irecv((char *)recvbuf+rdispls[dst],
- recvcnts[dst], recvtypes[dst], dst,
- MPIR_ALLTOALLW_TAG, comm,
- &reqarray[outstanding_requests]);
+ mpi_errno = MPIC_Irecv_ft((char *)recvbuf+rdispls[dst],
+ recvcnts[dst], recvtypes[dst], dst,
+ MPIR_ALLTOALLW_TAG, comm,
+ &reqarray[outstanding_requests]);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
outstanding_requests++;
@@ -157,10 +160,10 @@
if (sendcnts[dst]) {
MPID_Datatype_get_size_macro(sendtypes[dst], type_size);
if (type_size) {
- mpi_errno = MPIC_Isend((char *)sendbuf+sdispls[dst],
- sendcnts[dst], sendtypes[dst], dst,
- MPIR_ALLTOALLW_TAG, comm,
- &reqarray[outstanding_requests]);
+ mpi_errno = MPIC_Isend_ft((char *)sendbuf+sdispls[dst],
+ sendcnts[dst], sendtypes[dst], dst,
+ MPIR_ALLTOALLW_TAG, comm,
+ &reqarray[outstanding_requests], errflag);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
outstanding_requests++;
@@ -168,7 +171,7 @@
}
}
- mpi_errno = MPIR_Waitall_impl(outstanding_requests, reqarray, starray);
+ mpi_errno = MPIC_Waitall_ft(outstanding_requests, reqarray, starray, errflag);
if (mpi_errno && mpi_errno != MPI_ERR_IN_STATUS) MPIU_ERR_POP(mpi_errno);
/* --BEGIN ERROR HANDLING-- */
@@ -178,6 +181,7 @@
mpi_errno = starray[i].MPI_ERROR;
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -200,14 +204,15 @@
for (i=1; i<comm_size; i++) {
src = (rank - i + comm_size) % comm_size;
dst = (rank + i) % comm_size;
- mpi_errno = MPIC_Sendrecv(((char *)sendbuf+sdispls[dst]),
- sendcnts[dst], sendtypes[dst], dst,
- MPIR_ALLTOALLW_TAG,
- ((char *)recvbuf+rdispls[src]),
- recvcnts[src], recvtypes[dst], src,
- MPIR_ALLTOALLW_TAG, comm, &status);
+ mpi_errno = MPIC_Sendrecv_ft(((char *)sendbuf+sdispls[dst]),
+ sendcnts[dst], sendtypes[dst], dst,
+ MPIR_ALLTOALLW_TAG,
+ ((char *)recvbuf+rdispls[src]),
+ recvcnts[src], recvtypes[dst], src,
+ MPIR_ALLTOALLW_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -221,6 +226,8 @@
MPIU_CHKLMEM_FREEALL();
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
@@ -242,7 +249,8 @@
int *recvcnts,
int *rdispls,
MPI_Datatype *recvtypes,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
/* Intercommunicator alltoallw. We use a pairwise exchange algorithm
similar to the one used in intracommunicator alltoallw. Since the
@@ -300,12 +308,13 @@
sendtype = sendtypes[dst];
}
- mpi_errno = MPIC_Sendrecv(sendaddr, sendcount, sendtype,
- dst, MPIR_ALLTOALLW_TAG, recvaddr,
- recvcount, recvtype, src,
- MPIR_ALLTOALLW_TAG, comm, &status);
+ mpi_errno = MPIC_Sendrecv_ft(sendaddr, sendcount, sendtype,
+ dst, MPIR_ALLTOALLW_TAG, recvaddr,
+ recvcount, recvtype, src,
+ MPIR_ALLTOALLW_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -316,6 +325,8 @@
MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -327,7 +338,7 @@
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Alltoallw(void *sendbuf, int *sendcnts, int *sdispls, MPI_Datatype *sendtypes,
void *recvbuf, int *recvcnts, int *rdispls, MPI_Datatype *recvtypes,
- MPID_Comm *comm_ptr)
+ MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
@@ -335,13 +346,13 @@
/* intracommunicator */
mpi_errno = MPIR_Alltoallw_intra(sendbuf, sendcnts, sdispls,
sendtypes, recvbuf, recvcnts,
- rdispls, recvtypes, comm_ptr);
+ rdispls, recvtypes, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
/* intercommunicator */
mpi_errno = MPIR_Alltoallw_inter(sendbuf, sendcnts, sdispls,
sendtypes, recvbuf, recvcnts,
- rdispls, recvtypes, comm_ptr);
+ rdispls, recvtypes, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -357,19 +368,19 @@
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Alltoallw_impl(void *sendbuf, int *sendcnts, int *sdispls, MPI_Datatype *sendtypes,
void *recvbuf, int *recvcnts, int *rdispls, MPI_Datatype *recvtypes,
- MPID_Comm *comm_ptr)
+ MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Alltoallw != NULL) {
mpi_errno = comm_ptr->coll_fns->Alltoallw(sendbuf, sendcnts, sdispls,
sendtypes, recvbuf, recvcnts,
- rdispls, recvtypes, comm_ptr);
+ rdispls, recvtypes, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
mpi_errno = MPIR_Alltoallw(sendbuf, sendcnts, sdispls,
sendtypes, recvbuf, recvcnts,
- rdispls, recvtypes, comm_ptr);
+ rdispls, recvtypes, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -429,6 +440,7 @@
{
int mpi_errno = MPI_SUCCESS;
MPID_Comm *comm_ptr = NULL;
+ int errflag = FALSE;
MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLTOALLW);
MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -522,7 +534,7 @@
mpi_errno = MPIR_Alltoallw_impl(sendbuf, sendcnts, sdispls,
sendtypes, recvbuf, recvcnts,
- rdispls, recvtypes, comm_ptr);
+ rdispls, recvtypes, comm_ptr, &errflag);
if (mpi_errno) goto fn_fail;
/* ... end of body of routine ... */
Modified: mpich2/trunk/src/mpi/coll/barrier.c
===================================================================
--- mpich2/trunk/src/mpi/coll/barrier.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/barrier.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -74,9 +74,9 @@
dst = (rank + mask) % size;
src = (rank - mask + size) % size;
mpi_errno = MPIC_Sendrecv(NULL, 0, MPI_BYTE, dst,
- MPIR_BARRIER_TAG, NULL, 0, MPI_BYTE,
- src, MPIR_BARRIER_TAG, comm,
- MPI_STATUS_IGNORE);
+ MPIR_BARRIER_TAG, NULL, 0, MPI_BYTE,
+ src, MPIR_BARRIER_TAG, comm,
+ MPI_STATUS_IGNORE);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
@@ -137,7 +137,8 @@
int mpi_errno_ret = MPI_SUCCESS;
int i = 0;
MPID_Comm *newcomm_ptr = NULL;
-
+ int errflag = FALSE;
+
rank = comm_ptr->rank;
/* Get the local intracommunicator */
@@ -166,38 +167,42 @@
if (comm_ptr->is_low_group) {
/* bcast to right*/
root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
- mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr);
+ mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, &errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
/* receive bcast from right */
root = 0;
- mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr);
+ mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, &errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
}
else {
/* receive bcast from left */
root = 0;
- mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr);
+ mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, &errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
/* bcast to left */
root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
- mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr);
+ mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, &errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
}
fn_exit:
if (mpi_errno_ret)
@@ -207,7 +212,8 @@
goto fn_exit;
}
-/* MPIR_Barrier performs an barrier using point-to-point messages.
+/* MPIR_Barrier performs an barrier using poin MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+t-to-point messages.
This is intended to be used by device-specific implementations of
barrier. In all other cases MPIR_Barrier_impl should be used. */
#undef FUNCNAME
@@ -247,7 +253,7 @@
{
int mpi_errno = MPI_SUCCESS;
int mpi_errno_ret = MPI_SUCCESS;
-
+ int errflag = FALSE;
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Barrier != NULL)
{
mpi_errno = comm_ptr->coll_fns->Barrier(comm_ptr);
@@ -285,12 +291,13 @@
if (comm_ptr->node_comm != NULL)
{
int i=0;
- mpi_errno = MPIR_Bcast_impl(&i, 1, MPI_BYTE, 0, comm_ptr->node_comm);
+ mpi_errno = MPIR_Bcast_impl(&i, 1, MPI_BYTE, 0, comm_ptr->node_comm, &errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
}
}
else {
Modified: mpich2/trunk/src/mpi/coll/bcast.c
===================================================================
--- mpich2/trunk/src/mpi/coll/bcast.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/bcast.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -35,7 +35,8 @@
int count,
MPI_Datatype datatype,
int root,
- MPID_Comm *comm_ptr)
+ MPID_Comm *comm_ptr,
+ int *errflag)
{
int rank, comm_size, src, dst;
int relative_rank, mask;
@@ -131,13 +132,14 @@
src = rank - mask;
if (src < 0) src += comm_size;
if (!is_contig || !is_homogeneous)
- mpi_errno = MPIC_Recv(tmp_buf,nbytes,MPI_BYTE,src,
- MPIR_BCAST_TAG,comm,MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Recv_ft(tmp_buf,nbytes,MPI_BYTE,src,
+ MPIR_BCAST_TAG,comm,MPI_STATUS_IGNORE, errflag);
else
- mpi_errno = MPIC_Recv(buffer,count,datatype,src,
- MPIR_BCAST_TAG,comm,MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Recv_ft(buffer,count,datatype,src,
+ MPIR_BCAST_TAG,comm,MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -165,13 +167,14 @@
dst = rank + mask;
if (dst >= comm_size) dst -= comm_size;
if (!is_contig || !is_homogeneous)
- mpi_errno = MPIC_Send(tmp_buf,nbytes,MPI_BYTE,dst,
- MPIR_BCAST_TAG,comm);
+ mpi_errno = MPIC_Send_ft(tmp_buf,nbytes,MPI_BYTE,dst,
+ MPIR_BCAST_TAG,comm, errflag);
else
- mpi_errno = MPIC_Send(buffer,count,datatype,dst,
- MPIR_BCAST_TAG,comm);
+ mpi_errno = MPIC_Send_ft(buffer,count,datatype,dst,
+ MPIR_BCAST_TAG,comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -195,6 +198,8 @@
MPIU_CHKLMEM_FREEALL();
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -223,7 +228,8 @@
int nbytes,
void *tmp_buf,
int is_contig,
- int is_homogeneous)
+ int is_homogeneous,
+ int *errflag)
{
MPI_Status status;
int rank, comm_size, src, dst;
@@ -272,12 +278,13 @@
}
else
{
- mpi_errno = MPIC_Recv(((char *)tmp_buf +
- relative_rank*scatter_size),
- recv_size, MPI_BYTE, src,
- MPIR_BCAST_TAG, comm, &status);
+ mpi_errno = MPIC_Recv_ft(((char *)tmp_buf +
+ relative_rank*scatter_size),
+ recv_size, MPI_BYTE, src,
+ MPIR_BCAST_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
curr_size = 0;
@@ -307,12 +314,13 @@
{
dst = rank + mask;
if (dst >= comm_size) dst -= comm_size;
- mpi_errno = MPIC_Send (((char *)tmp_buf +
- scatter_size*(relative_rank+mask)),
- send_size, MPI_BYTE, dst,
- MPIR_BCAST_TAG, comm);
+ mpi_errno = MPIC_Send_ft(((char *)tmp_buf +
+ scatter_size*(relative_rank+mask)),
+ send_size, MPI_BYTE, dst,
+ MPIR_BCAST_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -326,6 +334,8 @@
fn_exit:
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -360,7 +370,8 @@
int count,
MPI_Datatype datatype,
int root,
- MPID_Comm *comm_ptr)
+ MPID_Comm *comm_ptr,
+ int *errflag)
{
MPI_Status status;
int rank, comm_size, dst;
@@ -439,9 +450,10 @@
mpi_errno = scatter_for_bcast(buffer, count, datatype, root, comm_ptr,
- nbytes, tmp_buf, is_contig, is_homogeneous);
+ nbytes, tmp_buf, is_contig, is_homogeneous, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -473,13 +485,14 @@
if (relative_dst < comm_size)
{
- mpi_errno = MPIC_Sendrecv(((char *)tmp_buf + send_offset),
- curr_size, MPI_BYTE, dst, MPIR_BCAST_TAG,
- ((char *)tmp_buf + recv_offset),
- (nbytes-recv_offset < 0 ? 0 : nbytes-recv_offset),
- MPI_BYTE, dst, MPIR_BCAST_TAG, comm, &status);
+ mpi_errno = MPIC_Sendrecv_ft(((char *)tmp_buf + send_offset),
+ curr_size, MPI_BYTE, dst, MPIR_BCAST_TAG,
+ ((char *)tmp_buf + recv_offset),
+ (nbytes-recv_offset < 0 ? 0 : nbytes-recv_offset),
+ MPI_BYTE, dst, MPIR_BCAST_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
recv_size = 0;
@@ -546,14 +559,15 @@
/* printf("Rank %d, send to %d, offset %d, size %d\n", rank, dst, offset, recv_size);
fflush(stdout); */
- mpi_errno = MPIC_Send(((char *)tmp_buf + offset),
- recv_size, MPI_BYTE, dst,
- MPIR_BCAST_TAG, comm);
+ mpi_errno = MPIC_Send_ft(((char *)tmp_buf + offset),
+ recv_size, MPI_BYTE, dst,
+ MPIR_BCAST_TAG, comm, errflag);
/* recv_size was set in the previous
receive. that's the amount of data to be
sent now. */
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -566,14 +580,15 @@
{
/* printf("Rank %d waiting to recv from rank %d\n",
relative_rank, dst); */
- mpi_errno = MPIC_Recv(((char *)tmp_buf + offset),
- nbytes - offset,
- MPI_BYTE, dst, MPIR_BCAST_TAG,
- comm, &status);
+ mpi_errno = MPIC_Recv_ft(((char *)tmp_buf + offset),
+ nbytes - offset,
+ MPI_BYTE, dst, MPIR_BCAST_TAG,
+ comm, &status, errflag);
/* nprocs_completed is also equal to the no. of processes
whose data we don't have */
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
recv_size = 0;
@@ -608,6 +623,8 @@
MPIU_CHKLMEM_FREEALL();
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -638,7 +655,8 @@
int count,
MPI_Datatype datatype,
int root,
- MPID_Comm *comm_ptr)
+ MPID_Comm *comm_ptr,
+ int *errflag)
{
int rank, comm_size;
int relative_rank;
@@ -712,9 +730,10 @@
scatter_size = (nbytes + comm_size - 1)/comm_size; /* ceiling division */
mpi_errno = scatter_for_bcast(buffer, count, datatype, root, comm_ptr,
- nbytes, tmp_buf, is_contig, is_homogeneous);
+ nbytes, tmp_buf, is_contig, is_homogeneous, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -745,17 +764,18 @@
for (i=1; i<comm_size; i++)
{
mpi_errno =
- MPIC_Sendrecv((char *)tmp_buf +
- displs[(j-root+comm_size)%comm_size],
- recvcnts[(j-root+comm_size)%comm_size],
- MPI_BYTE, right, MPIR_BCAST_TAG,
- (char *)tmp_buf +
- displs[(jnext-root+comm_size)%comm_size],
- recvcnts[(jnext-root+comm_size)%comm_size],
- MPI_BYTE, left,
- MPIR_BCAST_TAG, comm, MPI_STATUS_IGNORE);
+ MPIC_Sendrecv_ft((char *)tmp_buf +
+ displs[(j-root+comm_size)%comm_size],
+ recvcnts[(j-root+comm_size)%comm_size],
+ MPI_BYTE, right, MPIR_BCAST_TAG,
+ (char *)tmp_buf +
+ displs[(jnext-root+comm_size)%comm_size],
+ recvcnts[(jnext-root+comm_size)%comm_size],
+ MPI_BYTE, left,
+ MPIR_BCAST_TAG, comm, MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -779,6 +799,8 @@
MPIU_CHKLMEM_FREEALL();
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -789,21 +811,22 @@
Otherwise it invokes bcast_fn_ with the given args.
NOTE: calls MPIU_ERR_POP on any failure, so a fn_fail label is needed. */
-#define MPIR_Bcast_fn_or_override(bcast_fn_,mpi_errno_,buffer_,count_,datatype_,root_,comm_ptr_) \
+#define MPIR_Bcast_fn_or_override(bcast_fn_,mpi_errno_,buffer_,count_,datatype_,root_,comm_ptr_,errflag_) \
do { \
if (comm_ptr_->coll_fns != NULL && comm_ptr_->coll_fns->Bcast != NULL) \
{ \
/* --BEGIN USEREXTENSION-- */ \
mpi_errno_ = comm_ptr->coll_fns->Bcast(buffer_, count_, \
- datatype_, root_, comm_ptr_); \
+ datatype_, root_, comm_ptr_, errflag_); \
/* --END USEREXTENSION-- */ \
} \
else \
{ \
- mpi_errno_ = bcast_fn_(buffer_, count_, datatype_, root_, comm_ptr_); \
+ mpi_errno_ = bcast_fn_(buffer_, count_, datatype_, root_, comm_ptr_, errflag_); \
} \
if (mpi_errno) { \
/* for communication errors, just record the error but continue */ \
+ *(errflag_) = TRUE; \
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail"); \
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno); \
} \
@@ -820,7 +843,8 @@
int count,
MPI_Datatype datatype,
int root,
- MPID_Comm *comm_ptr)
+ MPID_Comm *comm_ptr,
+ int *errflag)
{
int mpi_errno = MPI_SUCCESS;
int mpi_errno_ret = MPI_SUCCESS;
@@ -859,15 +883,16 @@
MPIU_Get_intranode_rank(comm_ptr, root) > 0) /* is not the node root (0) */
{ /* and is on our node (!-1) */
if (root == comm_ptr->rank) {
- mpi_errno = MPIC_Send(buffer,count,datatype,0,
- MPIR_BCAST_TAG,comm_ptr->node_comm->handle);
+ mpi_errno = MPIC_Send_ft(buffer,count,datatype,0,
+ MPIR_BCAST_TAG,comm_ptr->node_comm->handle, errflag);
}
else if (0 == comm_ptr->node_comm->rank) {
- mpi_errno = MPIC_Recv(buffer,count,datatype,MPIU_Get_intranode_rank(comm_ptr, root),
- MPIR_BCAST_TAG,comm_ptr->node_comm->handle,MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Recv_ft(buffer,count,datatype,MPIU_Get_intranode_rank(comm_ptr, root),
+ MPIR_BCAST_TAG,comm_ptr->node_comm->handle,MPI_STATUS_IGNORE, errflag);
}
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -879,14 +904,14 @@
MPIR_Bcast_fn_or_override(MPIR_Bcast_binomial, mpi_errno,
buffer, count, datatype,
MPIU_Get_internode_rank(comm_ptr, root),
- comm_ptr->node_roots_comm);
+ comm_ptr->node_roots_comm, errflag);
}
/* perform the intranode broadcast on all except for the root's node */
if (comm_ptr->node_comm != NULL)
{
MPIR_Bcast_fn_or_override(MPIR_Bcast_binomial, mpi_errno,
- buffer, count, datatype, 0, comm_ptr->node_comm);
+ buffer, count, datatype, 0, comm_ptr->node_comm, errflag);
}
}
else /* (nbytes > MPIR_PARAM_BCAST_SHORT_MSG_SIZE) && (comm_ptr->size >= MPIR_PARAM_BCAST_MIN_PROCS) */
@@ -908,7 +933,7 @@
MPIR_Bcast_fn_or_override(MPIR_Bcast_binomial, mpi_errno,
buffer, count, datatype,
MPIU_Get_intranode_rank(comm_ptr, root),
- comm_ptr->node_comm);
+ comm_ptr->node_comm, errflag);
}
/* perform the internode broadcast */
@@ -919,14 +944,14 @@
MPIR_Bcast_fn_or_override(MPIR_Bcast_scatter_doubling_allgather, mpi_errno,
buffer, count, datatype,
MPIU_Get_internode_rank(comm_ptr, root),
- comm_ptr->node_roots_comm);
+ comm_ptr->node_roots_comm, errflag);
}
else
{
MPIR_Bcast_fn_or_override(MPIR_Bcast_scatter_ring_allgather, mpi_errno,
buffer, count, datatype,
MPIU_Get_internode_rank(comm_ptr, root),
- comm_ptr->node_roots_comm);
+ comm_ptr->node_roots_comm, errflag);
}
}
@@ -938,7 +963,7 @@
bcast. We need a more comprehensive system for selecting the
right algorithms here. */
MPIR_Bcast_fn_or_override(MPIR_Bcast_binomial, mpi_errno,
- buffer, count, datatype, 0, comm_ptr->node_comm);
+ buffer, count, datatype, 0, comm_ptr->node_comm, errflag);
}
}
else /* large msg or non-pof2 */
@@ -946,9 +971,10 @@
/* FIXME It would be good to have an SMP-aware version of this
algorithm that (at least approximately) minimized internode
communication. */
- mpi_errno = MPIR_Bcast_scatter_ring_allgather(buffer, count, datatype, root, comm_ptr);
+ mpi_errno = MPIR_Bcast_scatter_ring_allgather(buffer, count, datatype, root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -958,6 +984,8 @@
fn_exit:
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -1016,7 +1044,8 @@
int count,
MPI_Datatype datatype,
int root,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
int mpi_errno = MPI_SUCCESS;
int mpi_errno_ret = MPI_SUCCESS;
@@ -1035,9 +1064,10 @@
#if defined(USE_SMP_COLLECTIVES)
if (MPIR_Comm_is_node_aware(comm_ptr)) {
- mpi_errno = MPIR_SMP_Bcast(buffer, count, datatype, root, comm_ptr);
+ mpi_errno = MPIR_SMP_Bcast(buffer, count, datatype, root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -1070,9 +1100,10 @@
if ((nbytes < MPIR_PARAM_BCAST_SHORT_MSG_SIZE) || (comm_size < MPIR_PARAM_BCAST_MIN_PROCS))
{
- mpi_errno = MPIR_Bcast_binomial(buffer, count, datatype, root, comm_ptr);
+ mpi_errno = MPIR_Bcast_binomial(buffer, count, datatype, root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -1081,9 +1112,10 @@
{
if ((nbytes < MPIR_PARAM_BCAST_LONG_MSG_SIZE) && (MPIU_is_pof2(comm_size, NULL)))
{
- mpi_errno = MPIR_Bcast_scatter_doubling_allgather(buffer, count, datatype, root, comm_ptr);
+ mpi_errno = MPIR_Bcast_scatter_doubling_allgather(buffer, count, datatype, root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -1093,9 +1125,10 @@
/* We want the ring algorithm whether or not we have a
topologically aware communicator. Doing inter/intra-node
communication phases breaks the pipelining of the algorithm. */
- mpi_errno = MPIR_Bcast_scatter_ring_allgather(buffer, count, datatype, root, comm_ptr);
+ mpi_errno = MPIR_Bcast_scatter_ring_allgather(buffer, count, datatype, root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -1110,6 +1143,8 @@
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -1127,7 +1162,8 @@
int count,
MPI_Datatype datatype,
int root,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag)
{
/* Intercommunicator broadcast.
Root sends to rank 0 in remote group. Remote group does local
@@ -1153,10 +1189,11 @@
{
/* root sends to rank 0 on remote group and returns */
MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
- mpi_errno = MPIC_Send(buffer, count, datatype, 0,
- MPIR_BCAST_TAG, comm);
+ mpi_errno = MPIC_Send_ft(buffer, count, datatype, 0,
+ MPIR_BCAST_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -1170,10 +1207,11 @@
if (rank == 0)
{
- mpi_errno = MPIC_Recv(buffer, count, datatype, root,
- MPIR_BCAST_TAG, comm, &status);
+ mpi_errno = MPIC_Recv_ft(buffer, count, datatype, root,
+ MPIR_BCAST_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -1187,9 +1225,10 @@
/* now do the usual broadcast on this intracommunicator
with rank 0 as root. */
- mpi_errno = MPIR_Bcast_intra(buffer, count, datatype, 0, newcomm_ptr);
+ mpi_errno = MPIR_Bcast_intra(buffer, count, datatype, 0, newcomm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -1199,6 +1238,8 @@
MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_BCAST_INTER);
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
}
@@ -1211,7 +1252,7 @@
#define FUNCNAME MPIR_Bcast_impl
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPIR_Bcast_impl(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr)
+int MPIR_Bcast_impl(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
@@ -1219,7 +1260,7 @@
{
/* --BEGIN USEREXTENSION-- */
mpi_errno = comm_ptr->coll_fns->Bcast(buffer, count,
- datatype, root, comm_ptr);
+ datatype, root, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
/* --END USEREXTENSION-- */
}
@@ -1228,14 +1269,14 @@
if (comm_ptr->comm_kind == MPID_INTRACOMM)
{
/* intracommunicator */
- mpi_errno = MPIR_Bcast_intra( buffer, count, datatype, root, comm_ptr );
+ mpi_errno = MPIR_Bcast_intra( buffer, count, datatype, root, comm_ptr, errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
else
{
/* intercommunicator */
- mpi_errno = MPIR_Bcast_inter( buffer, count, datatype, root, comm_ptr );
+ mpi_errno = MPIR_Bcast_inter( buffer, count, datatype, root, comm_ptr, errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
}
@@ -1254,18 +1295,18 @@
#define FUNCNAME MPIR_Bcast
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPIR_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr)
+int MPIR_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->comm_kind == MPID_INTRACOMM) {
/* intracommunicator */
- mpi_errno = MPIR_Bcast_intra( buffer, count, datatype, root, comm_ptr );
+ mpi_errno = MPIR_Bcast_intra( buffer, count, datatype, root, comm_ptr, errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
/* intercommunicator */
- mpi_errno = MPIR_Bcast_inter( buffer, count, datatype, root, comm_ptr );
+ mpi_errno = MPIR_Bcast_inter( buffer, count, datatype, root, comm_ptr, errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -1313,6 +1354,7 @@
{
int mpi_errno = MPI_SUCCESS;
MPID_Comm *comm_ptr = NULL;
+ int errflag = FALSE;
MPID_MPI_STATE_DECL(MPID_STATE_MPI_BCAST);
MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -1370,7 +1412,7 @@
/* ... body of routine ... */
- mpi_errno = MPIR_Bcast_impl( buffer, count, datatype, root, comm_ptr );
+ mpi_errno = MPIR_Bcast_impl( buffer, count, datatype, root, comm_ptr, &errflag );
if (mpi_errno) goto fn_fail;
/* ... end of body of routine ... */
Modified: mpich2/trunk/src/mpi/coll/exscan.c
===================================================================
--- mpich2/trunk/src/mpi/coll/exscan.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/exscan.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -102,7 +102,8 @@
int count,
MPI_Datatype datatype,
MPI_Op op,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
MPI_Status status;
int rank, comm_size;
@@ -183,13 +184,14 @@
dst = rank ^ mask;
if (dst < comm_size) {
/* Send partial_scan to dst. Recv into tmp_buf */
- mpi_errno = MPIC_Sendrecv(partial_scan, count, datatype,
- dst, MPIR_EXSCAN_TAG, tmp_buf,
- count, datatype, dst,
- MPIR_EXSCAN_TAG, comm,
- &status);
+ mpi_errno = MPIC_Sendrecv_ft(partial_scan, count, datatype,
+ dst, MPIR_EXSCAN_TAG, tmp_buf,
+ count, datatype, dst,
+ MPIR_EXSCAN_TAG, comm,
+ &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -244,6 +246,8 @@
MPIU_CHKLMEM_FREEALL();
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -258,15 +262,15 @@
#define FUNCNAME MPIR_Exscan_impl
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPIR_Exscan_impl(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr)
+int MPIR_Exscan_impl(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Exscan != NULL) {
- mpi_errno = comm_ptr->coll_fns->Exscan(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+ mpi_errno = comm_ptr->coll_fns->Exscan(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
- mpi_errno = MPIR_Exscan(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+ mpi_errno = MPIR_Exscan(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -323,6 +327,7 @@
{
int mpi_errno = MPI_SUCCESS;
MPID_Comm *comm_ptr = NULL;
+ int errflag = FALSE;
MPID_MPI_STATE_DECL(MPID_STATE_MPI_EXSCAN);
MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -394,7 +399,7 @@
/* ... body of routine ... */
- mpi_errno = MPIR_Exscan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+ mpi_errno = MPIR_Exscan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, &errflag);
if (mpi_errno) goto fn_fail;
/* ... end of body of routine ... */
Modified: mpich2/trunk/src/mpi/coll/gather.c
===================================================================
--- mpich2/trunk/src/mpi/coll/gather.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/gather.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -58,7 +58,8 @@
int recvcnt,
MPI_Datatype recvtype,
int root,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
int comm_size, rank;
int mpi_errno = MPI_SUCCESS;
@@ -190,21 +191,27 @@
* receive buffer, place it directly. This
* should cover the case where the root is
* rank 0. */
- mpi_errno = MPIC_Recv(((char *)recvbuf +
- (((rank + mask) % comm_size)*recvcnt*extent)),
- recvblks * recvcnt, recvtype, src,
- MPIR_GATHER_TAG, comm,
- &status);
+ mpi_errno = MPIC_Recv_ft(((char *)recvbuf +
+ (((rank + mask) % comm_size)*recvcnt*extent)),
+ recvblks * recvcnt, recvtype, src,
+ MPIR_GATHER_TAG, comm,
+ &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
else if (nbytes < MPIR_PARAM_GATHER_VSMALL_MSG_SIZE) {
- mpi_errno = MPIC_Recv(tmp_buf, recvblks * nbytes, MPI_BYTE,
- src, MPIR_GATHER_TAG, comm, &status);
- if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ mpi_errno = MPIC_Recv_ft(tmp_buf, recvblks * nbytes, MPI_BYTE,
+ src, MPIR_GATHER_TAG, comm, &status, errflag);
+ if (mpi_errno) {
+ /* for communication errors, just record the error but continue */
+ *errflag = TRUE;
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+ MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
+ }
copy_offset = rank + mask;
copy_blks = recvblks;
}
@@ -220,10 +227,11 @@
mpi_errno = MPIR_Type_commit_impl(&tmp_type);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
- mpi_errno = MPIC_Recv(recvbuf, 1, tmp_type, src,
- MPIR_GATHER_TAG, comm, &status);
+ mpi_errno = MPIC_Recv_ft(recvbuf, 1, tmp_type, src,
+ MPIR_GATHER_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -245,12 +253,13 @@
offset = mask * nbytes;
else
offset = (mask - 1) * nbytes;
- mpi_errno = MPIC_Recv(((char *)tmp_buf + offset),
- recvblks * nbytes, MPI_BYTE, src,
- MPIR_GATHER_TAG, comm,
- &status);
+ mpi_errno = MPIC_Recv_ft(((char *)tmp_buf + offset),
+ recvblks * nbytes, MPI_BYTE, src,
+ MPIR_GATHER_TAG, comm,
+ &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -266,19 +275,21 @@
if (!tmp_buf_size)
{
/* leaf nodes send directly from sendbuf */
- mpi_errno = MPIC_Send(sendbuf, sendcnt, sendtype, dst,
- MPIR_GATHER_TAG, comm);
+ mpi_errno = MPIC_Send_ft(sendbuf, sendcnt, sendtype, dst,
+ MPIR_GATHER_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
else if (nbytes < MPIR_PARAM_GATHER_VSMALL_MSG_SIZE) {
- mpi_errno = MPIC_Send(tmp_buf, curr_cnt, MPI_BYTE, dst,
- MPIR_GATHER_TAG, comm);
+ mpi_errno = MPIC_Send_ft(tmp_buf, curr_cnt, MPI_BYTE, dst,
+ MPIR_GATHER_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -297,10 +308,11 @@
mpi_errno = MPIR_Type_commit_impl(&tmp_type);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
- mpi_errno = MPIC_Send(MPI_BOTTOM, 1, tmp_type, dst,
- MPIR_GATHER_TAG, comm);
+ mpi_errno = MPIC_Send_ft(MPI_BOTTOM, 1, tmp_type, dst,
+ MPIR_GATHER_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -367,12 +379,13 @@
if (src < comm_size)
{
src = (src + root) % comm_size;
- mpi_errno = MPIC_Recv(((char *)tmp_buf + curr_cnt),
- tmp_buf_size-curr_cnt, MPI_BYTE, src,
- MPIR_GATHER_TAG, comm,
- &status);
+ mpi_errno = MPIC_Recv_ft(((char *)tmp_buf + curr_cnt),
+ tmp_buf_size-curr_cnt, MPI_BYTE, src,
+ MPIR_GATHER_TAG, comm,
+ &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
recv_size = 0;
@@ -387,10 +400,11 @@
{
dst = relative_rank ^ mask;
dst = (dst + root) % comm_size;
- mpi_errno = MPIC_Send(tmp_buf, curr_cnt, MPI_BYTE, dst,
- MPIR_GATHER_TAG, comm);
+ mpi_errno = MPIC_Send_ft(tmp_buf, curr_cnt, MPI_BYTE, dst,
+ MPIR_GATHER_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -434,6 +448,8 @@
MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -454,7 +470,8 @@
int recvcnt,
MPI_Datatype recvtype,
int root,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
/* Intercommunicator gather.
For short messages, remote group does a local intracommunicator
@@ -506,11 +523,12 @@
if (root == MPI_ROOT)
{
/* root receives data from rank 0 on remote group */
- mpi_errno = MPIC_Recv(recvbuf, recvcnt*remote_size,
- recvtype, 0, MPIR_GATHER_TAG, comm,
- &status);
+ mpi_errno = MPIC_Recv_ft(recvbuf, recvcnt*remote_size,
+ recvtype, 0, MPIR_GATHER_TAG, comm,
+ &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -546,20 +564,22 @@
/* now do the a local gather on this intracommunicator */
mpi_errno = MPIR_Gather_impl(sendbuf, sendcnt, sendtype,
tmp_buf, sendcnt, sendtype, 0,
- newcomm_ptr);
+ newcomm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
if (rank == 0)
{
- mpi_errno = MPIC_Send(tmp_buf, sendcnt*local_size,
- sendtype, root,
- MPIR_GATHER_TAG, comm);
+ mpi_errno = MPIC_Send_ft(tmp_buf, sendcnt*local_size,
+ sendtype, root,
+ MPIR_GATHER_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -577,11 +597,12 @@
for (i=0; i<remote_size; i++)
{
- mpi_errno = MPIC_Recv(((char *)recvbuf+recvcnt*i*extent),
- recvcnt, recvtype, i,
- MPIR_GATHER_TAG, comm, &status);
+ mpi_errno = MPIC_Recv_ft(((char *)recvbuf+recvcnt*i*extent),
+ recvcnt, recvtype, i,
+ MPIR_GATHER_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -589,10 +610,11 @@
}
else
{
- mpi_errno = MPIC_Send(sendbuf,sendcnt,sendtype,root,
- MPIR_GATHER_TAG,comm);
+ mpi_errno = MPIC_Send_ft(sendbuf,sendcnt,sendtype,root,
+ MPIR_GATHER_TAG,comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -604,6 +626,8 @@
MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -619,7 +643,7 @@
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Gather(void *sendbuf, int sendcnt, MPI_Datatype sendtype,
void *recvbuf, int recvcnt, MPI_Datatype recvtype,
- int root, MPID_Comm *comm_ptr)
+ int root, MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
@@ -627,13 +651,13 @@
/* intracommunicator */
mpi_errno = MPIR_Gather_intra(sendbuf, sendcnt, sendtype,
recvbuf, recvcnt, recvtype, root,
- comm_ptr);
+ comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
/* intercommunicator */
mpi_errno = MPIR_Gather_inter(sendbuf, sendcnt, sendtype,
recvbuf, recvcnt, recvtype, root,
- comm_ptr);
+ comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -653,19 +677,19 @@
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Gather_impl(void *sendbuf, int sendcnt, MPI_Datatype sendtype,
void *recvbuf, int recvcnt, MPI_Datatype recvtype,
- int root, MPID_Comm *comm_ptr)
+ int root, MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Gather != NULL) {
mpi_errno = comm_ptr->coll_fns->Gather(sendbuf, sendcnt,
sendtype, recvbuf, recvcnt,
- recvtype, root, comm_ptr);
+ recvtype, root, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
mpi_errno = MPIR_Gather(sendbuf, sendcnt, sendtype,
recvbuf, recvcnt, recvtype, root,
- comm_ptr);
+ comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -717,6 +741,7 @@
{
int mpi_errno = MPI_SUCCESS;
MPID_Comm *comm_ptr = NULL;
+ int errflag = FALSE;
MPID_MPI_STATE_DECL(MPID_STATE_MPI_GATHER);
MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -820,7 +845,7 @@
/* ... body of routine ... */
- mpi_errno = MPIR_Gather_impl(sendbuf, sendcnt, sendtype, recvbuf, recvcnt, recvtype, root, comm_ptr);
+ mpi_errno = MPIR_Gather_impl(sendbuf, sendcnt, sendtype, recvbuf, recvcnt, recvtype, root, comm_ptr, &errflag);
if (mpi_errno) goto fn_fail;
/* ... end of body of routine ... */
Modified: mpich2/trunk/src/mpi/coll/gatherv.c
===================================================================
--- mpich2/trunk/src/mpi/coll/gatherv.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/gatherv.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -57,7 +57,8 @@
int *displs,
MPI_Datatype recvtype,
int root,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
int comm_size, rank;
int mpi_errno = MPI_SUCCESS;
@@ -104,16 +105,16 @@
}
}
else {
- mpi_errno = MPIC_Irecv(((char *)recvbuf+displs[i]*extent),
- recvcnts[i], recvtype, i,
- MPIR_GATHERV_TAG, comm,
- &reqarray[reqs++]);
+ mpi_errno = MPIC_Irecv_ft(((char *)recvbuf+displs[i]*extent),
+ recvcnts[i], recvtype, i,
+ MPIR_GATHERV_TAG, comm,
+ &reqarray[reqs++]);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
}
}
/* ... then wait for *all* of them to finish: */
- mpi_errno = MPIR_Waitall_impl(reqs, reqarray, starray);
+ mpi_errno = MPIC_Waitall_ft(reqs, reqarray, starray, errflag);
if (mpi_errno&& mpi_errno != MPI_ERR_IN_STATUS) MPIU_ERR_POP(mpi_errno);
/* --BEGIN ERROR HANDLING-- */
@@ -123,6 +124,7 @@
mpi_errno = starray[i].MPI_ERROR;
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -146,19 +148,21 @@
MPIR_PARAM_GET_DEFAULT_INT(GATHERV_INTER_SSEND_MIN_PROCS,&min_procs);
if (comm_size >= min_procs) {
- mpi_errno = MPIC_Ssend(sendbuf, sendcnt, sendtype, root,
- MPIR_GATHERV_TAG, comm);
+ mpi_errno = MPIC_Ssend_ft(sendbuf, sendcnt, sendtype, root,
+ MPIR_GATHERV_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
else {
- mpi_errno = MPIC_Send(sendbuf, sendcnt, sendtype, root,
- MPIR_GATHERV_TAG, comm);
+ mpi_errno = MPIC_Send_ft(sendbuf, sendcnt, sendtype, root,
+ MPIR_GATHERV_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -173,6 +177,8 @@
MPIU_CHKLMEM_FREEALL();
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -188,19 +194,19 @@
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Gatherv_impl(void *sendbuf, int sendcnt, MPI_Datatype sendtype,
void *recvbuf, int *recvcnts, int *displs, MPI_Datatype recvtype,
- int root, MPID_Comm *comm_ptr)
+ int root, MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Gatherv != NULL) {
mpi_errno = comm_ptr->coll_fns->Gatherv(sendbuf, sendcnt, sendtype,
recvbuf, recvcnts, displs, recvtype,
- root, comm_ptr);
+ root, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
mpi_errno = MPIR_Gatherv(sendbuf, sendcnt, sendtype,
recvbuf, recvcnts, displs, recvtype,
- root, comm_ptr);
+ root, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -256,6 +262,7 @@
{
int mpi_errno = MPI_SUCCESS;
MPID_Comm *comm_ptr = NULL;
+ int errflag = FALSE;
MPID_MPI_STATE_DECL(MPID_STATE_MPI_GATHERV);
MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -373,7 +380,7 @@
mpi_errno = MPIR_Gatherv_impl(sendbuf, sendcnt, sendtype,
recvbuf, recvcnts, displs, recvtype,
- root, comm_ptr);
+ root, comm_ptr, &errflag);
if (mpi_errno) goto fn_fail;
/* ... end of body of routine ... */
Modified: mpich2/trunk/src/mpi/coll/helper_fns.c
===================================================================
--- mpich2/trunk/src/mpi/coll/helper_fns.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/helper_fns.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -548,3 +548,300 @@
return mpi_errno;
/* --END ERROR HANDLING-- */
}
+
+
+/* Fault-tolerance versions. When a process fails, collectives will
+ still complete, however the result may be invalid. Processes
+ directly communicating with the failed process can detect the
+ failure, however another mechanism is needed to commuinicate the
+ failure to other processes receiving the invalid data. To do this
+ we introduce the _ft versions of the MPIC_ helper functions. These
+ functions take a pointer to an error flag. When this is set to
+ TRUE, the send functions will communicate the failure to the
+ receiver. If a function detects a failure, either by getting a
+ failure in the communication operation, or by receiving an error
+ indicator from a remote process, it sets the error flag to TRUE.
+
+ In this implementation, we indicate an error to a remote process by
+ sending an empty message instead of the requested buffer. When a
+ process receives an empty message, it knows to set the error flag.
+ We count on the fact that collectives that exchange data (as
+ opposed to barrier) will never send an empty message. The barrier
+ collective will not communicate failure information this way, but
+ this is OK since there is no data that can be received corrupted. */
+
+#undef FUNCNAME
+#define FUNCNAME MPIC_Send_ft
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPIC_Send_ft(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+ MPI_Comm comm, int *errflag)
+{
+ int mpi_errno = MPI_SUCCESS;
+ MPIDI_STATE_DECL(MPID_STATE_MPIC_SEND_FT);
+
+ MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SEND_FT);
+
+ MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
+
+ if (*errflag)
+ mpi_errno = MPIC_Send(buf, count, datatype, dest, MPIR_ERROR_TAG, comm);
+ else
+ mpi_errno = MPIC_Send(buf, count, datatype, dest, tag, comm);
+
+ fn_exit:
+ MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SEND_FT);
+ return mpi_errno;
+ fn_fail:
+ goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPIC_Recv_ft
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPIC_Recv_ft(void *buf, int count, MPI_Datatype datatype, int source, int tag,
+ MPI_Comm comm, MPI_Status *status, int *errflag)
+{
+ int mpi_errno = MPI_SUCCESS;
+ MPI_Status mystatus;
+ MPIDI_STATE_DECL(MPID_STATE_MPIC_RECV_FT);
+
+ MPIDI_FUNC_ENTER(MPID_STATE_MPIC_RECV_FT);
+
+ MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
+
+ if (status == MPI_STATUS_IGNORE)
+ status = &mystatus;
+
+ mpi_errno = MPIC_Recv(buf, count, datatype, source, MPI_ANY_TAG, comm, status);
+ if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+ if (*errflag)
+ goto fn_exit;
+
+ if (source != MPI_PROC_NULL) {
+ if (status->MPI_TAG == MPIR_ERROR_TAG)
+ *errflag = TRUE;
+ else {
+ MPIU_Assert(status->MPI_TAG == tag);
+ }
+ }
+
+ fn_exit:
+ MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");
+ MPIDI_FUNC_EXIT(MPID_STATE_MPIC_RECV_FT);
+ return mpi_errno;
+ fn_fail:
+ goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPIC_Ssend_ft
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPIC_Ssend_ft(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+ MPI_Comm comm, int *errflag)
+{
+ int mpi_errno = MPI_SUCCESS;
+ MPIDI_STATE_DECL(MPID_STATE_MPIC_SSEND_FT);
+
+ MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SSEND_FT);
+
+ MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
+
+ if (*errflag)
+ mpi_errno = MPIC_Ssend(buf, count, datatype, dest, MPIR_ERROR_TAG, comm);
+ else
+ mpi_errno = MPIC_Ssend(buf, count, datatype, dest, tag, comm);
+
+ fn_exit:
+ MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SSEND_FT);
+ return mpi_errno;
+ fn_fail:
+ goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPIC_Sendrecv_ft
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPIC_Sendrecv_ft(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+ int dest, int sendtag, void *recvbuf, int recvcount,
+ MPI_Datatype recvtype, int source, int recvtag,
+ MPI_Comm comm, MPI_Status *status, int *errflag)
+{
+ int mpi_errno = MPI_SUCCESS;
+ MPI_Status mystatus;
+ MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV_FT);
+
+ MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SENDRECV_FT);
+
+ MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
+
+ if (status == MPI_STATUS_IGNORE)
+ status = &mystatus;
+
+ if (*errflag) {
+ mpi_errno = MPIC_Sendrecv(sendbuf, sendcount, sendtype, dest, MPIR_ERROR_TAG,
+ recvbuf, recvcount, recvtype, source, MPI_ANY_TAG,
+ comm, status);
+ goto fn_exit;
+ }
+
+ mpi_errno = MPIC_Sendrecv(sendbuf, sendcount, sendtype, dest, sendtag,
+ recvbuf, recvcount, recvtype, source, MPI_ANY_TAG,
+ comm, status);
+ if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+ if (source != MPI_PROC_NULL) {
+ if (status->MPI_TAG == MPIR_ERROR_TAG)
+ *errflag = TRUE;
+ else {
+ MPIU_Assert(status->MPI_TAG == recvtag);
+ }
+ }
+
+ fn_exit:
+ MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");
+
+ MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SENDRECV_FT);
+ return mpi_errno;
+ fn_fail:
+ goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPIC_Sendrecv_replace_ft
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPIC_Sendrecv_replace_ft(void *buf, int count, MPI_Datatype datatype,
+ int dest, int sendtag,
+ int source, int recvtag,
+ MPI_Comm comm, MPI_Status *status, int *errflag)
+{
+ int mpi_errno = MPI_SUCCESS;
+ MPI_Status mystatus;
+ MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV_REPLACE_FT);
+
+ MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SENDRECV_REPLACE_FT);
+
+ MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
+
+ if (status == MPI_STATUS_IGNORE)
+ status = &mystatus;
+
+ if (*errflag) {
+ mpi_errno = MPIC_Sendrecv_replace(buf, count, datatype,
+ dest, MPIR_ERROR_TAG,
+ source, recvtag,
+ comm, status);
+ goto fn_exit;
+ }
+
+ mpi_errno = MPIC_Sendrecv_replace(buf, count, datatype,
+ dest, sendtag,
+ source, MPI_ANY_TAG,
+ comm, status);
+ if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+ if (source != MPI_PROC_NULL) {
+ if (status->MPI_TAG == MPIR_ERROR_TAG)
+ *errflag = TRUE;
+ else {
+ MPIU_Assert(status->MPI_TAG == recvtag);
+ }
+ }
+
+ fn_exit:
+ MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");
+ MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SENDRECV_REPLACE_FT);
+ return mpi_errno;
+ fn_fail:
+ goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPIC_Isend_ft
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPIC_Isend_ft(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+ MPI_Comm comm, MPI_Request *request, int *errflag)
+{
+ int mpi_errno = MPI_SUCCESS;
+ MPIDI_STATE_DECL(MPID_STATE_MPIC_ISEND_FT);
+
+ MPIDI_FUNC_ENTER(MPID_STATE_MPIC_ISEND_FT);
+
+ MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
+
+ if (*errflag)
+ mpi_errno = MPIC_Isend(buf, count, datatype, dest, MPIR_ERROR_TAG, comm, request);
+ else
+ mpi_errno = MPIC_Isend(buf, count, datatype, dest, tag, comm, request);
+
+ fn_exit:
+ MPIDI_FUNC_EXIT(MPID_STATE_MPIC_ISEND_FT);
+ return mpi_errno;
+ fn_fail:
+ goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPIC_Irecv_ft
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPIC_Irecv_ft(void *buf, int count, MPI_Datatype datatype, int source,
+ int tag, MPI_Comm comm, MPI_Request *request)
+{
+ int mpi_errno = MPI_SUCCESS;
+ MPIDI_STATE_DECL(MPID_STATE_MPIC_IRECV_FT);
+
+ MPIDI_FUNC_ENTER(MPID_STATE_MPIC_IRECV_FT);
+
+ mpi_errno = MPIC_Irecv(buf, count, datatype, source, MPI_ANY_TAG, comm, request);
+
+ fn_exit:
+ MPIDI_FUNC_EXIT(MPID_STATE_MPIC_IRECV_FT);
+ return mpi_errno;
+ fn_fail:
+ goto fn_exit;
+}
+
+
+#undef FUNCNAME
+#define FUNCNAME MPIC_Waitall_ft
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPIC_Waitall_ft(int numreq, MPI_Request requests[], MPI_Status statuses[], int *errflag)
+{
+ int mpi_errno = MPI_SUCCESS;
+ int i;
+ MPIDI_STATE_DECL(MPID_STATE_MPIC_WAITALL_FT);
+
+ MPIDI_FUNC_ENTER(MPID_STATE_MPIC_WAITALL_FT);
+
+ MPIU_Assert(statuses != MPI_STATUSES_IGNORE);
+
+ MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
+
+ mpi_errno = MPIR_Waitall_impl(numreq, requests, statuses);
+ if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+ if (*errflag)
+ goto fn_exit;
+
+ for (i = 0; i < numreq; ++i) {
+ if (statuses[i].MPI_TAG == MPIR_ERROR_TAG) {
+ *errflag = TRUE;
+ break;
+ }
+ }
+
+ fn_exit:
+ MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");
+ MPIDI_FUNC_EXIT(MPID_STATE_MPIC_WAITALL_FT);
+ return mpi_errno;
+ fn_fail:
+ goto fn_exit;
+}
Modified: mpich2/trunk/src/mpi/coll/red_scat.c
===================================================================
--- mpich2/trunk/src/mpi/coll/red_scat.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/red_scat.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -80,7 +80,8 @@
int *recvcnts,
MPI_Datatype datatype,
MPI_Op op,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
int mpi_errno = MPI_SUCCESS;
int mpi_errno_ret = MPI_SUCCESS;
@@ -187,13 +188,14 @@
send_offset += size;
}
- mpi_errno = MPIC_Sendrecv(outgoing_data + send_offset*true_extent,
- size, datatype, peer, MPIR_REDUCE_SCATTER_TAG,
- incoming_data + recv_offset*true_extent,
- size, datatype, peer, MPIR_REDUCE_SCATTER_TAG,
- comm, MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Sendrecv_ft(outgoing_data + send_offset*true_extent,
+ size, datatype, peer, MPIR_REDUCE_SCATTER_TAG,
+ incoming_data + recv_offset*true_extent,
+ size, datatype, peer, MPIR_REDUCE_SCATTER_TAG,
+ comm, MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -229,6 +231,8 @@
MPIU_CHKLMEM_FREEALL();
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -293,7 +297,8 @@
int *recvcnts,
MPI_Datatype datatype,
MPI_Op op,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
int rank, comm_size, i;
MPI_Aint extent, true_extent, true_lb;
@@ -413,11 +418,12 @@
if (rank < 2*rem) {
if (rank % 2 == 0) { /* even */
- mpi_errno = MPIC_Send(tmp_results, total_count,
- datatype, rank+1,
- MPIR_REDUCE_SCATTER_TAG, comm);
+ mpi_errno = MPIC_Send_ft(tmp_results, total_count,
+ datatype, rank+1,
+ MPIR_REDUCE_SCATTER_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -428,12 +434,13 @@
newrank = -1;
}
else { /* odd */
- mpi_errno = MPIC_Recv(tmp_recvbuf, total_count,
- datatype, rank-1,
- MPIR_REDUCE_SCATTER_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Recv_ft(tmp_recvbuf, total_count,
+ datatype, rank-1,
+ MPIR_REDUCE_SCATTER_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -513,30 +520,31 @@
*/
/* Send data from tmp_results. Recv into tmp_recvbuf */
if ((send_cnt != 0) && (recv_cnt != 0))
- mpi_errno = MPIC_Sendrecv((char *) tmp_results +
- newdisps[send_idx]*extent,
- send_cnt, datatype,
- dst, MPIR_REDUCE_SCATTER_TAG,
- (char *) tmp_recvbuf +
- newdisps[recv_idx]*extent,
- recv_cnt, datatype, dst,
- MPIR_REDUCE_SCATTER_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Sendrecv_ft((char *) tmp_results +
+ newdisps[send_idx]*extent,
+ send_cnt, datatype,
+ dst, MPIR_REDUCE_SCATTER_TAG,
+ (char *) tmp_recvbuf +
+ newdisps[recv_idx]*extent,
+ recv_cnt, datatype, dst,
+ MPIR_REDUCE_SCATTER_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
else if ((send_cnt == 0) && (recv_cnt != 0))
- mpi_errno = MPIC_Recv((char *) tmp_recvbuf +
- newdisps[recv_idx]*extent,
- recv_cnt, datatype, dst,
- MPIR_REDUCE_SCATTER_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Recv_ft((char *) tmp_recvbuf +
+ newdisps[recv_idx]*extent,
+ recv_cnt, datatype, dst,
+ MPIR_REDUCE_SCATTER_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
else if ((recv_cnt == 0) && (send_cnt != 0))
- mpi_errno = MPIC_Send((char *) tmp_results +
- newdisps[send_idx]*extent,
- send_cnt, datatype,
- dst, MPIR_REDUCE_SCATTER_TAG,
- comm);
+ mpi_errno = MPIC_Send_ft((char *) tmp_results +
+ newdisps[send_idx]*extent,
+ send_cnt, datatype,
+ dst, MPIR_REDUCE_SCATTER_TAG,
+ comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -583,12 +591,13 @@
if (rank < 2*rem) {
if (rank % 2) { /* odd */
if (recvcnts[rank-1]) {
- mpi_errno = MPIC_Send((char *) tmp_results +
- disps[rank-1]*extent, recvcnts[rank-1],
- datatype, rank-1,
- MPIR_REDUCE_SCATTER_TAG, comm);
+ mpi_errno = MPIC_Send_ft((char *) tmp_results +
+ disps[rank-1]*extent, recvcnts[rank-1],
+ datatype, rank-1,
+ MPIR_REDUCE_SCATTER_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -596,12 +605,13 @@
}
else { /* even */
if (recvcnts[rank]) {
- mpi_errno = MPIC_Recv(recvbuf, recvcnts[rank],
- datatype, rank+1,
- MPIR_REDUCE_SCATTER_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Recv_ft(recvbuf, recvcnts[rank],
+ datatype, rank+1,
+ MPIR_REDUCE_SCATTER_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -635,22 +645,23 @@
/* send the data that dst needs. recv data that this process
needs from src into tmp_recvbuf */
if (sendbuf != MPI_IN_PLACE)
- mpi_errno = MPIC_Sendrecv(((char *)sendbuf+disps[dst]*extent),
- recvcnts[dst], datatype, dst,
- MPIR_REDUCE_SCATTER_TAG, tmp_recvbuf,
- recvcnts[rank], datatype, src,
- MPIR_REDUCE_SCATTER_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Sendrecv_ft(((char *)sendbuf+disps[dst]*extent),
+ recvcnts[dst], datatype, dst,
+ MPIR_REDUCE_SCATTER_TAG, tmp_recvbuf,
+ recvcnts[rank], datatype, src,
+ MPIR_REDUCE_SCATTER_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
else
- mpi_errno = MPIC_Sendrecv(((char *)recvbuf+disps[dst]*extent),
- recvcnts[dst], datatype, dst,
- MPIR_REDUCE_SCATTER_TAG, tmp_recvbuf,
- recvcnts[rank], datatype, src,
- MPIR_REDUCE_SCATTER_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Sendrecv_ft(((char *)recvbuf+disps[dst]*extent),
+ recvcnts[dst], datatype, dst,
+ MPIR_REDUCE_SCATTER_TAG, tmp_recvbuf,
+ recvcnts[rank], datatype, src,
+ MPIR_REDUCE_SCATTER_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -755,8 +766,13 @@
if (pof2 == comm_size && is_block_regular) {
/* noncommutative, pof2 size, and block regular */
- mpi_errno = MPIR_Reduce_scatter_noncomm(sendbuf, recvbuf, recvcnts, datatype, op, comm_ptr);
- if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ mpi_errno = MPIR_Reduce_scatter_noncomm(sendbuf, recvbuf, recvcnts, datatype, op, comm_ptr, errflag);
+ if (mpi_errno) {
+ /* for communication errors, just record the error but continue */
+ *errflag = TRUE;
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+ MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
+ }
}
else {
/* noncommutative and (non-pof2 or block irregular), use recursive doubling. */
@@ -844,14 +860,15 @@
received in tmp_recvbuf and then accumulated into
tmp_results. accumulation is done later below. */
- mpi_errno = MPIC_Sendrecv(tmp_results, 1, sendtype, dst,
- MPIR_REDUCE_SCATTER_TAG,
- tmp_recvbuf, 1, recvtype, dst,
- MPIR_REDUCE_SCATTER_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Sendrecv_ft(tmp_results, 1, sendtype, dst,
+ MPIR_REDUCE_SCATTER_TAG,
+ tmp_recvbuf, 1, recvtype, dst,
+ MPIR_REDUCE_SCATTER_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
received = 1;
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -893,22 +910,28 @@
(rank < tree_root + nprocs_completed)
&& (dst >= tree_root + nprocs_completed)) {
/* send the current result */
- mpi_errno = MPIC_Send(tmp_recvbuf, 1, recvtype,
- dst, MPIR_REDUCE_SCATTER_TAG,
- comm);
- if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ mpi_errno = MPIC_Send_ft(tmp_recvbuf, 1, recvtype,
+ dst, MPIR_REDUCE_SCATTER_TAG,
+ comm, errflag);
+ if (mpi_errno) {
+ /* for communication errors, just record the error but continue */
+ *errflag = TRUE;
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+ MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
+ }
}
/* recv only if this proc. doesn't have data and sender
has data */
else if ((dst < rank) &&
(dst < tree_root + nprocs_completed) &&
(rank >= tree_root + nprocs_completed)) {
- mpi_errno = MPIC_Recv(tmp_recvbuf, 1, recvtype, dst,
- MPIR_REDUCE_SCATTER_TAG,
- comm, MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Recv_ft(tmp_recvbuf, 1, recvtype, dst,
+ MPIR_REDUCE_SCATTER_TAG,
+ comm, MPI_STATUS_IGNORE, errflag);
received = 1;
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -919,7 +942,7 @@
}
/* The following reduction is done here instead of after
- the MPIC_Sendrecv or MPIC_Recv above. This is
+ the MPIC_Sendrecv_ft or MPIC_Recv_ft above. This is
because to do it above, in the noncommutative
case, we would need an extra temp buffer so as not to
overwrite temp_recvbuf, because temp_recvbuf may have
@@ -1001,6 +1024,8 @@
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -1019,7 +1044,8 @@
int *recvcnts,
MPI_Datatype datatype,
MPI_Op op,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
/* Intercommunicator Reduce_scatter.
We first do an intercommunicator reduce to rank 0 on left group,
@@ -1068,9 +1094,10 @@
/* reduce from right group to rank 0*/
root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
mpi_errno = MPIR_Reduce_inter(sendbuf, tmp_buf, total_count, datatype, op,
- root, comm_ptr);
+ root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -1078,9 +1105,10 @@
/* reduce to rank 0 of right group */
root = 0;
mpi_errno = MPIR_Reduce_inter(sendbuf, tmp_buf, total_count, datatype, op,
- root, comm_ptr);
+ root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -1089,9 +1117,10 @@
/* reduce to rank 0 of left group */
root = 0;
mpi_errno = MPIR_Reduce_inter(sendbuf, tmp_buf, total_count, datatype, op,
- root, comm_ptr);
+ root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -1099,9 +1128,10 @@
/* reduce from right group to rank 0 */
root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
mpi_errno = MPIR_Reduce_inter(sendbuf, tmp_buf, total_count, datatype, op,
- root, comm_ptr);
+ root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -1116,9 +1146,10 @@
newcomm_ptr = comm_ptr->local_comm;
mpi_errno = MPIR_Scatterv(tmp_buf, recvcnts, disps, datatype, recvbuf,
- recvcnts[rank], datatype, 0, newcomm_ptr);
+ recvcnts[rank], datatype, 0, newcomm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -1127,6 +1158,8 @@
MPIU_CHKLMEM_FREEALL();
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -1142,19 +1175,19 @@
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Reduce_scatter(void *sendbuf, void *recvbuf, int *recvcnts,
- MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr)
+ MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->comm_kind == MPID_INTRACOMM) {
/* intracommunicator */
mpi_errno = MPIR_Reduce_scatter_intra(sendbuf, recvbuf, recvcnts,
- datatype, op, comm_ptr);
+ datatype, op, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
/* intercommunicator */
mpi_errno = MPIR_Reduce_scatter_inter(sendbuf, recvbuf, recvcnts,
- datatype, op, comm_ptr);
+ datatype, op, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -1173,17 +1206,17 @@
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Reduce_scatter_impl(void *sendbuf, void *recvbuf, int *recvcnts,
- MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr)
+ MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Reduce_scatter != NULL) {
mpi_errno = comm_ptr->coll_fns->Reduce_scatter(sendbuf, recvbuf, recvcnts,
- datatype, op, comm_ptr);
+ datatype, op, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
mpi_errno = MPIR_Reduce_scatter(sendbuf, recvbuf, recvcnts,
- datatype, op, comm_ptr);
+ datatype, op, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -1235,6 +1268,7 @@
{
int mpi_errno = MPI_SUCCESS;
MPID_Comm *comm_ptr = NULL;
+ int errflag = FALSE;
MPID_MPI_STATE_DECL(MPID_STATE_MPI_REDUCE_SCATTER);
MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -1312,7 +1346,7 @@
/* ... body of routine ... */
mpi_errno = MPIR_Reduce_scatter_impl(sendbuf, recvbuf, recvcnts,
- datatype, op, comm_ptr);
+ datatype, op, comm_ptr, &errflag);
if (mpi_errno) goto fn_fail;
/* ... end of body of routine ... */
Modified: mpich2/trunk/src/mpi/coll/red_scat_block.c
===================================================================
--- mpich2/trunk/src/mpi/coll/red_scat_block.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/red_scat_block.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -86,7 +86,8 @@
int recvcount,
MPI_Datatype datatype,
MPI_Op op,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
int mpi_errno = MPI_SUCCESS;
int mpi_errno_ret = MPI_SUCCESS;
@@ -189,13 +190,14 @@
send_offset += size;
}
- mpi_errno = MPIC_Sendrecv(outgoing_data + send_offset*true_extent,
- size, datatype, peer, MPIR_REDUCE_SCATTER_BLOCK_TAG,
- incoming_data + recv_offset*true_extent,
- size, datatype, peer, MPIR_REDUCE_SCATTER_BLOCK_TAG,
- comm, MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Sendrecv_ft(outgoing_data + send_offset*true_extent,
+ size, datatype, peer, MPIR_REDUCE_SCATTER_BLOCK_TAG,
+ incoming_data + recv_offset*true_extent,
+ size, datatype, peer, MPIR_REDUCE_SCATTER_BLOCK_TAG,
+ comm, MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -233,6 +235,8 @@
MPIU_CHKLMEM_FREEALL();
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -297,7 +301,8 @@
int recvcount,
MPI_Datatype datatype,
MPI_Op op,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
int rank, comm_size, i;
MPI_Aint extent, true_extent, true_lb;
@@ -416,11 +421,12 @@
if (rank < 2*rem) {
if (rank % 2 == 0) { /* even */
- mpi_errno = MPIC_Send(tmp_results, total_count,
- datatype, rank+1,
- MPIR_REDUCE_SCATTER_BLOCK_TAG, comm);
+ mpi_errno = MPIC_Send_ft(tmp_results, total_count,
+ datatype, rank+1,
+ MPIR_REDUCE_SCATTER_BLOCK_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -431,12 +437,13 @@
newrank = -1;
}
else { /* odd */
- mpi_errno = MPIC_Recv(tmp_recvbuf, total_count,
- datatype, rank-1,
- MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Recv_ft(tmp_recvbuf, total_count,
+ datatype, rank-1,
+ MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -516,30 +523,31 @@
*/
/* Send data from tmp_results. Recv into tmp_recvbuf */
if ((send_cnt != 0) && (recv_cnt != 0))
- mpi_errno = MPIC_Sendrecv((char *) tmp_results +
- newdisps[send_idx]*extent,
- send_cnt, datatype,
- dst, MPIR_REDUCE_SCATTER_BLOCK_TAG,
- (char *) tmp_recvbuf +
- newdisps[recv_idx]*extent,
- recv_cnt, datatype, dst,
- MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Sendrecv_ft((char *) tmp_results +
+ newdisps[send_idx]*extent,
+ send_cnt, datatype,
+ dst, MPIR_REDUCE_SCATTER_BLOCK_TAG,
+ (char *) tmp_recvbuf +
+ newdisps[recv_idx]*extent,
+ recv_cnt, datatype, dst,
+ MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
else if ((send_cnt == 0) && (recv_cnt != 0))
- mpi_errno = MPIC_Recv((char *) tmp_recvbuf +
- newdisps[recv_idx]*extent,
- recv_cnt, datatype, dst,
- MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Recv_ft((char *) tmp_recvbuf +
+ newdisps[recv_idx]*extent,
+ recv_cnt, datatype, dst,
+ MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
else if ((recv_cnt == 0) && (send_cnt != 0))
- mpi_errno = MPIC_Send((char *) tmp_results +
- newdisps[send_idx]*extent,
- send_cnt, datatype,
- dst, MPIR_REDUCE_SCATTER_BLOCK_TAG,
- comm);
+ mpi_errno = MPIC_Send_ft((char *) tmp_results +
+ newdisps[send_idx]*extent,
+ send_cnt, datatype,
+ dst, MPIR_REDUCE_SCATTER_BLOCK_TAG,
+ comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -582,19 +590,20 @@
calculated for that process */
if (rank < 2*rem) {
if (rank % 2) { /* odd */
- mpi_errno = MPIC_Send((char *) tmp_results +
- disps[rank-1]*extent, recvcount,
- datatype, rank-1,
- MPIR_REDUCE_SCATTER_BLOCK_TAG, comm);
+ mpi_errno = MPIC_Send_ft((char *) tmp_results +
+ disps[rank-1]*extent, recvcount,
+ datatype, rank-1,
+ MPIR_REDUCE_SCATTER_BLOCK_TAG, comm, errflag);
}
else { /* even */
- mpi_errno = MPIC_Recv(recvbuf, recvcount,
- datatype, rank+1,
- MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Recv_ft(recvbuf, recvcount,
+ datatype, rank+1,
+ MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
}
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -626,22 +635,23 @@
/* send the data that dst needs. recv data that this process
needs from src into tmp_recvbuf */
if (sendbuf != MPI_IN_PLACE)
- mpi_errno = MPIC_Sendrecv(((char *)sendbuf+disps[dst]*extent),
- recvcount, datatype, dst,
- MPIR_REDUCE_SCATTER_BLOCK_TAG, tmp_recvbuf,
- recvcount, datatype, src,
- MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Sendrecv_ft(((char *)sendbuf+disps[dst]*extent),
+ recvcount, datatype, dst,
+ MPIR_REDUCE_SCATTER_BLOCK_TAG, tmp_recvbuf,
+ recvcount, datatype, src,
+ MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
else
- mpi_errno = MPIC_Sendrecv(((char *)recvbuf+disps[dst]*extent),
- recvcount, datatype, dst,
- MPIR_REDUCE_SCATTER_BLOCK_TAG, tmp_recvbuf,
- recvcount, datatype, src,
- MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Sendrecv_ft(((char *)recvbuf+disps[dst]*extent),
+ recvcount, datatype, dst,
+ MPIR_REDUCE_SCATTER_BLOCK_TAG, tmp_recvbuf,
+ recvcount, datatype, src,
+ MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -736,8 +746,13 @@
/* power of two check */
if (!(comm_size & (comm_size - 1))) {
/* noncommutative, pof2 size */
- mpi_errno = MPIR_Reduce_scatter_block_noncomm(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr);
- if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ mpi_errno = MPIR_Reduce_scatter_block_noncomm(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, errflag);
+ if (mpi_errno) {
+ /* for communication errors, just record the error but continue */
+ *errflag = TRUE;
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+ MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
+ }
}
else {
/* noncommutative and non-pof2, use recursive doubling. */
@@ -825,14 +840,15 @@
received in tmp_recvbuf and then accumulated into
tmp_results. accumulation is done later below. */
- mpi_errno = MPIC_Sendrecv(tmp_results, 1, sendtype, dst,
- MPIR_REDUCE_SCATTER_BLOCK_TAG,
- tmp_recvbuf, 1, recvtype, dst,
- MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Sendrecv_ft(tmp_results, 1, sendtype, dst,
+ MPIR_REDUCE_SCATTER_BLOCK_TAG,
+ tmp_recvbuf, 1, recvtype, dst,
+ MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
received = 1;
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -874,11 +890,12 @@
(rank < tree_root + nprocs_completed)
&& (dst >= tree_root + nprocs_completed)) {
/* send the current result */
- mpi_errno = MPIC_Send(tmp_recvbuf, 1, recvtype,
- dst, MPIR_REDUCE_SCATTER_BLOCK_TAG,
- comm);
+ mpi_errno = MPIC_Send_ft(tmp_recvbuf, 1, recvtype,
+ dst, MPIR_REDUCE_SCATTER_BLOCK_TAG,
+ comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -888,12 +905,13 @@
else if ((dst < rank) &&
(dst < tree_root + nprocs_completed) &&
(rank >= tree_root + nprocs_completed)) {
- mpi_errno = MPIC_Recv(tmp_recvbuf, 1, recvtype, dst,
- MPIR_REDUCE_SCATTER_BLOCK_TAG,
- comm, MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Recv_ft(tmp_recvbuf, 1, recvtype, dst,
+ MPIR_REDUCE_SCATTER_BLOCK_TAG,
+ comm, MPI_STATUS_IGNORE, errflag);
received = 1;
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -904,7 +922,7 @@
}
/* The following reduction is done here instead of after
- the MPIC_Sendrecv or MPIC_Recv above. This is
+ the MPIC_Sendrecv_ft or MPIC_Recv_ft above. This is
because to do it above, in the noncommutative
case, we would need an extra temp buffer so as not to
overwrite temp_recvbuf, because temp_recvbuf may have
@@ -986,6 +1004,8 @@
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -1004,7 +1024,7 @@
int recvcount,
MPI_Datatype datatype,
MPI_Op op,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr, int *errflag )
{
/* Intercommunicator Reduce_scatter_block.
We first do an intercommunicator reduce to rank 0 on left group,
@@ -1043,9 +1063,10 @@
/* reduce from right group to rank 0*/
root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
mpi_errno = MPIR_Reduce_inter(sendbuf, tmp_buf, total_count, datatype, op,
- root, comm_ptr);
+ root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -1053,9 +1074,10 @@
/* reduce to rank 0 of right group */
root = 0;
mpi_errno = MPIR_Reduce_inter(sendbuf, tmp_buf, total_count, datatype, op,
- root, comm_ptr);
+ root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -1064,9 +1086,10 @@
/* reduce to rank 0 of left group */
root = 0;
mpi_errno = MPIR_Reduce_inter(sendbuf, tmp_buf, total_count, datatype, op,
- root, comm_ptr);
+ root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -1074,9 +1097,10 @@
/* reduce from right group to rank 0 */
root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
mpi_errno = MPIR_Reduce_inter(sendbuf, tmp_buf, total_count, datatype, op,
- root, comm_ptr);
+ root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -1089,9 +1113,10 @@
newcomm_ptr = comm_ptr->local_comm;
mpi_errno = MPIR_Scatter_impl(tmp_buf, recvcount, datatype, recvbuf,
- recvcount, datatype, 0, newcomm_ptr);
+ recvcount, datatype, 0, newcomm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -1100,6 +1125,8 @@
MPIU_CHKLMEM_FREEALL();
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -1115,17 +1142,17 @@
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Reduce_scatter_block(void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype,
- MPI_Op op, MPID_Comm *comm_ptr)
+ MPI_Op op, MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->comm_kind == MPID_INTRACOMM) {
/* intracommunicator */
- mpi_errno = MPIR_Reduce_scatter_block_intra(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr);
+ mpi_errno = MPIR_Reduce_scatter_block_intra(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
/* intercommunicator */
- mpi_errno = MPIR_Reduce_scatter_block_inter(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr);
+ mpi_errno = MPIR_Reduce_scatter_block_inter(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -1145,21 +1172,21 @@
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Reduce_scatter_block_impl(void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype,
- MPI_Op op, MPID_Comm *comm_ptr)
+ MPI_Op op, MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Reduce_scatter_block != NULL) {
- mpi_errno = comm_ptr->coll_fns->Reduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr);
+ mpi_errno = comm_ptr->coll_fns->Reduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
if (comm_ptr->comm_kind == MPID_INTRACOMM) {
/* intracommunicator */
- mpi_errno = MPIR_Reduce_scatter_block_intra(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr);
+ mpi_errno = MPIR_Reduce_scatter_block_intra(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
/* intercommunicator */
- mpi_errno = MPIR_Reduce_scatter_block_inter(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr);
+ mpi_errno = MPIR_Reduce_scatter_block_inter(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
}
@@ -1211,6 +1238,7 @@
{
int mpi_errno = MPI_SUCCESS;
MPID_Comm *comm_ptr = NULL;
+ int errflag = FALSE;
MPID_MPI_STATE_DECL(MPID_STATE_MPI_REDUCE_SCATTER_BLOCK);
MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -1279,7 +1307,7 @@
/* ... body of routine ... */
- mpi_errno = MPIR_Reduce_scatter_block_impl(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr);
+ mpi_errno = MPIR_Reduce_scatter_block_impl(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, &errflag);
if (mpi_errno) goto fn_fail;
/* ... end of body of routine ... */
Modified: mpich2/trunk/src/mpi/coll/reduce.c
===================================================================
--- mpich2/trunk/src/mpi/coll/reduce.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/reduce.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -38,7 +38,8 @@
MPI_Datatype datatype,
MPI_Op op,
int root,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
int mpi_errno = MPI_SUCCESS;
int mpi_errno_ret = MPI_SUCCESS;
@@ -167,10 +168,11 @@
source = (relrank | mask);
if (source < comm_size) {
source = (source + lroot) % comm_size;
- mpi_errno = MPIC_Recv (tmp_buf, count, datatype, source,
- MPIR_REDUCE_TAG, comm, &status);
+ mpi_errno = MPIC_Recv_ft(tmp_buf, count, datatype, source,
+ MPIR_REDUCE_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -206,10 +208,11 @@
/* I've received all that I'm going to. Send my result to
my parent */
source = ((relrank & (~ mask)) + lroot) % comm_size;
- mpi_errno = MPIC_Send( recvbuf, count, datatype,
- source, MPIR_REDUCE_TAG, comm );
+ mpi_errno = MPIC_Send_ft(recvbuf, count, datatype,
+ source, MPIR_REDUCE_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -222,16 +225,17 @@
{
if (rank == 0)
{
- mpi_errno = MPIC_Send( recvbuf, count, datatype, root,
- MPIR_REDUCE_TAG, comm );
+ mpi_errno = MPIC_Send_ft(recvbuf, count, datatype, root,
+ MPIR_REDUCE_TAG, comm, errflag);
}
else if (rank == root)
{
- mpi_errno = MPIC_Recv ( recvbuf, count, datatype, 0,
- MPIR_REDUCE_TAG, comm, &status);
+ mpi_errno = MPIC_Recv_ft(recvbuf, count, datatype, 0,
+ MPIR_REDUCE_TAG, comm, &status, errflag);
}
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -250,6 +254,8 @@
MPIU_CHKLMEM_FREEALL();
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -293,7 +299,8 @@
MPI_Datatype datatype,
MPI_Op op,
int root,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
int mpi_errno = MPI_SUCCESS;
int mpi_errno_ret = MPI_SUCCESS;
@@ -402,11 +409,12 @@
if (rank < 2*rem) {
if (rank % 2 != 0) { /* odd */
- mpi_errno = MPIC_Send(recvbuf, count,
- datatype, rank-1,
- MPIR_REDUCE_TAG, comm);
+ mpi_errno = MPIC_Send_ft(recvbuf, count,
+ datatype, rank-1,
+ MPIR_REDUCE_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -417,12 +425,13 @@
newrank = -1;
}
else { /* even */
- mpi_errno = MPIC_Recv(tmp_buf, count,
- datatype, rank+1,
- MPIR_REDUCE_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Recv_ft(tmp_buf, count,
+ datatype, rank+1,
+ MPIR_REDUCE_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -495,17 +504,18 @@
send_cnt, recv_cnt, last_idx);
*/
/* Send data from recvbuf. Recv into tmp_buf */
- mpi_errno = MPIC_Sendrecv((char *) recvbuf +
- disps[send_idx]*extent,
- send_cnt, datatype,
- dst, MPIR_REDUCE_TAG,
- (char *) tmp_buf +
- disps[recv_idx]*extent,
- recv_cnt, datatype, dst,
- MPIR_REDUCE_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Sendrecv_ft((char *) recvbuf +
+ disps[send_idx]*extent,
+ send_cnt, datatype,
+ dst, MPIR_REDUCE_TAG,
+ (char *) tmp_buf +
+ disps[recv_idx]*extent,
+ recv_cnt, datatype, dst,
+ MPIR_REDUCE_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -559,11 +569,12 @@
for (i=1; i<pof2; i++)
disps[i] = disps[i-1] + cnts[i-1];
- mpi_errno = MPIC_Recv(recvbuf, cnts[0], datatype,
- 0, MPIR_REDUCE_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Recv_ft(recvbuf, cnts[0], datatype,
+ 0, MPIR_REDUCE_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -572,10 +583,11 @@
last_idx = 2;
}
else if (newrank == 0) { /* send */
- mpi_errno = MPIC_Send(recvbuf, cnts[0], datatype,
- root, MPIR_REDUCE_TAG, comm);
+ mpi_errno = MPIC_Send_ft(recvbuf, cnts[0], datatype,
+ root, MPIR_REDUCE_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -642,13 +654,14 @@
/* printf("Rank %d, send_idx %d, send_cnt %d, last_idx %d\n", newrank, send_idx, send_cnt, last_idx);
fflush(stdout); */
/* Send data from recvbuf. Recv into tmp_buf */
- mpi_errno = MPIC_Send((char *) recvbuf +
- disps[send_idx]*extent,
- send_cnt, datatype,
- dst, MPIR_REDUCE_TAG,
- comm);
+ mpi_errno = MPIC_Send_ft((char *) recvbuf +
+ disps[send_idx]*extent,
+ send_cnt, datatype,
+ dst, MPIR_REDUCE_TAG,
+ comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -658,13 +671,14 @@
/* recv and continue */
/* printf("Rank %d, recv_idx %d, recv_cnt %d, last_idx %d\n", newrank, recv_idx, recv_cnt, last_idx);
fflush(stdout); */
- mpi_errno = MPIC_Recv((char *) recvbuf +
- disps[recv_idx]*extent,
- recv_cnt, datatype, dst,
- MPIR_REDUCE_TAG, comm,
- MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Recv_ft((char *) recvbuf +
+ disps[recv_idx]*extent,
+ recv_cnt, datatype, dst,
+ MPIR_REDUCE_TAG, comm,
+ MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -690,6 +704,8 @@
MPIU_CHKLMEM_FREEALL();
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -763,7 +779,8 @@
MPI_Datatype datatype,
MPI_Op op,
int root,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
int mpi_errno = MPI_SUCCESS;
int mpi_errno_ret = MPI_SUCCESS;
@@ -808,9 +825,10 @@
if (comm_ptr->node_comm != NULL &&
MPIU_Get_intranode_rank(comm_ptr, root) == -1) {
mpi_errno = MPIR_Reduce_impl(sendbuf, tmp_buf, count, datatype,
- op, 0, comm_ptr->node_comm);
+ op, 0, comm_ptr->node_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -824,9 +842,10 @@
void *buf = (comm_ptr->node_comm == NULL ? sendbuf : tmp_buf);
mpi_errno = MPIR_Reduce_impl(buf, NULL, count, datatype,
op, MPIU_Get_internode_rank(comm_ptr, root),
- comm_ptr->node_roots_comm);
+ comm_ptr->node_roots_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -838,9 +857,10 @@
mpi_errno = MPIR_Reduce_impl(sendbuf, tmp_buf, count, datatype,
op, MPIU_Get_internode_rank(comm_ptr, root),
- comm_ptr->node_roots_comm);
+ comm_ptr->node_roots_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -853,9 +873,10 @@
mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype,
op, MPIU_Get_internode_rank(comm_ptr, root),
- comm_ptr->node_roots_comm);
+ comm_ptr->node_roots_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -872,9 +893,10 @@
MPIU_Get_intranode_rank(comm_ptr, root) != -1) {
mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype,
op, MPIU_Get_intranode_rank(comm_ptr, root),
- comm_ptr->node_comm);
+ comm_ptr->node_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -908,18 +930,20 @@
if ((count*type_size > MPIR_PARAM_REDUCE_SHORT_MSG_SIZE) &&
(HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) && (count >= pof2)) {
/* do a reduce-scatter followed by gather to root. */
- mpi_errno = MPIR_Reduce_redscat_gather(sendbuf, recvbuf, count, datatype, op, root, comm_ptr);
+ mpi_errno = MPIR_Reduce_redscat_gather(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
else {
/* use a binomial tree algorithm */
- mpi_errno = MPIR_Reduce_binomial(sendbuf, recvbuf, count, datatype, op, root, comm_ptr);
+ mpi_errno = MPIR_Reduce_binomial(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -934,6 +958,8 @@
#endif
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -953,7 +979,8 @@
MPI_Datatype datatype,
MPI_Op op,
int root,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
/* Intercommunicator reduce.
Remote group does a local intracommunicator
@@ -982,10 +1009,11 @@
if (root == MPI_ROOT) {
/* root receives data from rank 0 on remote group */
- mpi_errno = MPIC_Recv(recvbuf, count, datatype, 0,
- MPIR_REDUCE_TAG, comm, &status);
+ mpi_errno = MPIC_Recv_ft(recvbuf, count, datatype, 0,
+ MPIR_REDUCE_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -1020,19 +1048,21 @@
/* now do a local reduce on this intracommunicator */
mpi_errno = MPIR_Reduce_intra(sendbuf, tmp_buf, count, datatype,
- op, 0, newcomm_ptr);
+ op, 0, newcomm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
if (rank == 0)
{
- mpi_errno = MPIC_Send(tmp_buf, count, datatype, root,
- MPIR_REDUCE_TAG, comm);
+ mpi_errno = MPIC_Send_ft(tmp_buf, count, datatype, root,
+ MPIR_REDUCE_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -1044,6 +1074,8 @@
MPIU_CHKLMEM_FREEALL();
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
@@ -1060,19 +1092,19 @@
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
- MPI_Op op, int root, MPID_Comm *comm_ptr)
+ MPI_Op op, int root, MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->comm_kind == MPID_INTRACOMM) {
/* intracommunicator */
mpi_errno = MPIR_Reduce_intra(sendbuf, recvbuf, count, datatype,
- op, root, comm_ptr);
+ op, root, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
/* intercommunicator */
mpi_errno = MPIR_Reduce_inter(sendbuf, recvbuf, count, datatype,
- op, root, comm_ptr);
+ op, root, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -1091,24 +1123,24 @@
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Reduce_impl(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
- MPI_Op op, int root, MPID_Comm *comm_ptr)
+ MPI_Op op, int root, MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Reduce != NULL) {
mpi_errno = comm_ptr->coll_fns->Reduce(sendbuf, recvbuf, count,
- datatype, op, root, comm_ptr);
+ datatype, op, root, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
if (comm_ptr->comm_kind == MPID_INTRACOMM) {
/* intracommunicator */
mpi_errno = MPIR_Reduce_intra(sendbuf, recvbuf, count, datatype,
- op, root, comm_ptr);
+ op, root, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
/* intercommunicator */
mpi_errno = MPIR_Reduce_inter(sendbuf, recvbuf, count, datatype,
- op, root, comm_ptr);
+ op, root, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
}
@@ -1164,6 +1196,7 @@
{
int mpi_errno = MPI_SUCCESS;
MPID_Comm *comm_ptr = NULL;
+ int errflag = FALSE;
MPID_MPI_STATE_DECL(MPID_STATE_MPI_REDUCE);
MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -1271,7 +1304,7 @@
/* ... body of routine ... */
- mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype, op, root, comm_ptr);
+ mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, &errflag);
if (mpi_errno) goto fn_fail;
/* ... end of body of routine ... */
Modified: mpich2/trunk/src/mpi/coll/scan.c
===================================================================
--- mpich2/trunk/src/mpi/coll/scan.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/scan.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -71,7 +71,8 @@
int count,
MPI_Datatype datatype,
MPI_Op op,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
MPI_Status status;
int rank, comm_size;
@@ -167,13 +168,14 @@
dst = rank ^ mask;
if (dst < comm_size) {
/* Send partial_scan to dst. Recv into tmp_buf */
- mpi_errno = MPIC_Sendrecv(partial_scan, count, datatype,
- dst, MPIR_SCAN_TAG, tmp_buf,
- count, datatype, dst,
- MPIR_SCAN_TAG, comm,
- &status);
+ mpi_errno = MPIC_Sendrecv_ft(partial_scan, count, datatype,
+ dst, MPIR_SCAN_TAG, tmp_buf,
+ count, datatype, dst,
+ MPIR_SCAN_TAG, comm,
+ &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -235,6 +237,8 @@
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -256,7 +260,8 @@
int count,
MPI_Datatype datatype,
MPI_Op op,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
int mpi_errno = MPI_SUCCESS;
int mpi_errno_ret = MPI_SUCCESS;
@@ -278,7 +283,7 @@
if (!MPIR_Comm_is_node_consecutive(comm_ptr)) {
/* We can't use the SMP-aware algorithm, use the generic one */
- return MPIR_Scan_generic(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+ return MPIR_Scan_generic(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
}
MPIU_THREADPRIV_GET;
@@ -310,9 +315,10 @@
if (comm_ptr->node_comm != NULL)
{
mpi_errno = MPIR_Scan_impl(sendbuf, recvbuf, count, datatype,
- op, comm_ptr->node_comm);
+ op, comm_ptr->node_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -330,11 +336,12 @@
reduced data of rank 1,2,3. */
if (comm_ptr->node_roots_comm != NULL && comm_ptr->node_comm != NULL)
{
- mpi_errno = MPIC_Recv(localfulldata, count, datatype,
- comm_ptr->node_comm->local_size - 1, MPIR_SCAN_TAG,
- comm_ptr->node_comm->handle, &status);
+ mpi_errno = MPIC_Recv_ft(localfulldata, count, datatype,
+ comm_ptr->node_comm->local_size - 1, MPIR_SCAN_TAG,
+ comm_ptr->node_comm->handle, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -343,10 +350,11 @@
comm_ptr->node_comm != NULL &&
MPIU_Get_intranode_rank(comm_ptr, rank) == comm_ptr->node_comm->local_size - 1)
{
- mpi_errno = MPIC_Send(recvbuf, count, datatype,
- 0, MPIR_SCAN_TAG, comm_ptr->node_comm->handle);
+ mpi_errno = MPIC_Send_ft(recvbuf, count, datatype,
+ 0, MPIR_SCAN_TAG, comm_ptr->node_comm->handle, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -363,9 +371,10 @@
if (comm_ptr->node_roots_comm != NULL)
{
mpi_errno = MPIR_Scan_impl(localfulldata, prefulldata, count, datatype,
- op, comm_ptr->node_roots_comm);
+ op, comm_ptr->node_roots_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -373,24 +382,26 @@
if (MPIU_Get_internode_rank(comm_ptr, rank) !=
comm_ptr->node_roots_comm->local_size-1)
{
- mpi_errno = MPIC_Send(prefulldata, count, datatype,
- MPIU_Get_internode_rank(comm_ptr, rank) + 1,
- MPIR_SCAN_TAG, comm_ptr->node_roots_comm->handle);
+ mpi_errno = MPIC_Send_ft(prefulldata, count, datatype,
+ MPIU_Get_internode_rank(comm_ptr, rank) + 1,
+ MPIR_SCAN_TAG, comm_ptr->node_roots_comm->handle, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
if (MPIU_Get_internode_rank(comm_ptr, rank) != 0)
{
- mpi_errno = MPIC_Recv(tempbuf, count, datatype,
- MPIU_Get_internode_rank(comm_ptr, rank) - 1,
- MPIR_SCAN_TAG, comm_ptr->node_roots_comm->handle,
- &status);
+ mpi_errno = MPIC_Recv_ft(tempbuf, count, datatype,
+ MPIU_Get_internode_rank(comm_ptr, rank) - 1,
+ MPIR_SCAN_TAG, comm_ptr->node_roots_comm->handle,
+ &status, errflag);
noneed = 0;
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -404,9 +415,10 @@
reduce it with recvbuf to get final result if nessesary. */
if (comm_ptr->node_comm != NULL) {
- mpi_errno = MPIR_Bcast_impl(&noneed, 1, MPI_INT, 0, comm_ptr->node_comm);
+ mpi_errno = MPIR_Bcast_impl(&noneed, 1, MPI_INT, 0, comm_ptr->node_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -417,9 +429,10 @@
int is_cxx_uop = 0;
#endif
if (comm_ptr->node_comm != NULL) {
- mpi_errno = MPIR_Bcast_impl(tempbuf, count, datatype, 0, comm_ptr->node_comm);
+ mpi_errno = MPIR_Bcast_impl(tempbuf, count, datatype, 0, comm_ptr->node_comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -462,6 +475,8 @@
MPIU_CHKLMEM_FREEALL();
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
@@ -477,17 +492,17 @@
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Scan_impl(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
- MPI_Op op, MPID_Comm *comm_ptr)
+ MPI_Op op, MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Scan != NULL) {
mpi_errno = comm_ptr->coll_fns->Scan(sendbuf, recvbuf, count,
- datatype, op, comm_ptr);
+ datatype, op, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
mpi_errno = MPIR_Scan(sendbuf, recvbuf, count, datatype,
- op, comm_ptr);
+ op, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -538,6 +553,7 @@
{
int mpi_errno = MPI_SUCCESS;
MPID_Comm *comm_ptr = NULL;
+ int errflag = FALSE;
MPID_MPI_STATE_DECL(MPID_STATE_MPI_SCAN);
MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -605,7 +621,7 @@
/* ... body of routine ... */
- mpi_errno = MPIR_Scan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+ mpi_errno = MPIR_Scan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, &errflag);
if (mpi_errno) goto fn_fail;
/* ... end of body of routine ... */
Modified: mpich2/trunk/src/mpi/coll/scatter.c
===================================================================
--- mpich2/trunk/src/mpi/coll/scatter.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/scatter.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -58,7 +58,8 @@
int recvcnt,
MPI_Datatype recvtype,
int root,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
MPI_Status status;
MPI_Aint extent=0;
@@ -169,20 +170,22 @@
they don't have to forward data to anyone. Others
receive data into a temporary buffer. */
if (relative_rank % 2) {
- mpi_errno = MPIC_Recv(recvbuf, recvcnt, recvtype,
- src, MPIR_SCATTER_TAG, comm,
- &status);
+ mpi_errno = MPIC_Recv_ft(recvbuf, recvcnt, recvtype,
+ src, MPIR_SCATTER_TAG, comm,
+ &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
else {
- mpi_errno = MPIC_Recv(tmp_buf, tmp_buf_size, MPI_BYTE, src,
- MPIR_SCATTER_TAG, comm, &status);
+ mpi_errno = MPIC_Recv_ft(tmp_buf, tmp_buf_size, MPI_BYTE, src,
+ MPIR_SCATTER_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
curr_cnt = 0;
@@ -211,24 +214,25 @@
{
send_subtree_cnt = curr_cnt - sendcnt * mask;
/* mask is also the size of this process's subtree */
- mpi_errno = MPIC_Send (((char *)sendbuf +
- extent * sendcnt * mask),
- send_subtree_cnt,
- sendtype, dst,
- MPIR_SCATTER_TAG, comm);
+ mpi_errno = MPIC_Send_ft(((char *)sendbuf +
+ extent * sendcnt * mask),
+ send_subtree_cnt,
+ sendtype, dst,
+ MPIR_SCATTER_TAG, comm, errflag);
}
else
{
/* non-zero root and others */
send_subtree_cnt = curr_cnt - nbytes*mask;
/* mask is also the size of this process's subtree */
- mpi_errno = MPIC_Send (((char *)tmp_buf + nbytes*mask),
- send_subtree_cnt,
- MPI_BYTE, dst,
- MPIR_SCATTER_TAG, comm);
+ mpi_errno = MPIC_Send_ft(((char *)tmp_buf + nbytes*mask),
+ send_subtree_cnt,
+ MPI_BYTE, dst,
+ MPIR_SCATTER_TAG, comm, errflag);
}
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -330,10 +334,11 @@
src = rank - mask;
if (src < 0) src += comm_size;
- mpi_errno = MPIC_Recv(tmp_buf, tmp_buf_size, MPI_BYTE, src,
- MPIR_SCATTER_TAG, comm, &status);
+ mpi_errno = MPIC_Recv_ft(tmp_buf, tmp_buf_size, MPI_BYTE, src,
+ MPIR_SCATTER_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
curr_cnt = 0;
@@ -359,11 +364,12 @@
send_subtree_cnt = curr_cnt - nbytes * mask;
/* mask is also the size of this process's subtree */
- mpi_errno = MPIC_Send (((char *)tmp_buf + nbytes*mask),
- send_subtree_cnt, MPI_BYTE, dst,
- MPIR_SCATTER_TAG, comm);
+ mpi_errno = MPIC_Send_ft(((char *)tmp_buf + nbytes*mask),
+ send_subtree_cnt, MPI_BYTE, dst,
+ MPIR_SCATTER_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -388,6 +394,8 @@
MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -408,7 +416,8 @@
int recvcnt,
MPI_Datatype recvtype,
int root,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
/* Intercommunicator scatter.
For short messages, root sends to rank 0 in remote group. rank 0
@@ -452,10 +461,11 @@
if (nbytes < MPIR_PARAM_SCATTER_INTER_SHORT_MSG_SIZE) {
if (root == MPI_ROOT) {
/* root sends all data to rank 0 on remote group and returns */
- mpi_errno = MPIC_Send(sendbuf, sendcnt*remote_size,
- sendtype, 0, MPIR_SCATTER_TAG, comm);
+ mpi_errno = MPIC_Send_ft(sendbuf, sendcnt*remote_size,
+ sendtype, 0, MPIR_SCATTER_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -480,11 +490,12 @@
/* adjust for potential negative lower bound in datatype */
tmp_buf = (void *)((char*)tmp_buf - true_lb);
- mpi_errno = MPIC_Recv(tmp_buf, recvcnt*local_size,
- recvtype, root,
- MPIR_SCATTER_TAG, comm, &status);
+ mpi_errno = MPIC_Recv_ft(tmp_buf, recvcnt*local_size,
+ recvtype, root,
+ MPIR_SCATTER_TAG, comm, &status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -499,9 +510,10 @@
/* now do the usual scatter on this intracommunicator */
mpi_errno = MPIR_Scatter_impl(tmp_buf, recvcnt, recvtype,
recvbuf, recvcnt, recvtype, 0,
- newcomm_ptr);
+ newcomm_ptr, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -512,21 +524,23 @@
if (root == MPI_ROOT) {
MPID_Datatype_get_extent_macro(sendtype, extent);
for (i=0; i<remote_size; i++) {
- mpi_errno = MPIC_Send(((char *)sendbuf+sendcnt*i*extent),
- sendcnt, sendtype, i,
- MPIR_SCATTER_TAG, comm);
+ mpi_errno = MPIC_Send_ft(((char *)sendbuf+sendcnt*i*extent),
+ sendcnt, sendtype, i,
+ MPIR_SCATTER_TAG, comm, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
}
else {
- mpi_errno = MPIC_Recv(recvbuf,recvcnt,recvtype,root,
- MPIR_SCATTER_TAG,comm,&status);
+ mpi_errno = MPIC_Recv_ft(recvbuf,recvcnt,recvtype,root,
+ MPIR_SCATTER_TAG,comm,&status, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -539,6 +553,8 @@
MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -554,7 +570,7 @@
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Scatter(void *sendbuf, int sendcnt, MPI_Datatype sendtype,
void *recvbuf, int recvcnt, MPI_Datatype recvtype,
- int root, MPID_Comm *comm_ptr)
+ int root, MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
@@ -562,13 +578,13 @@
/* intracommunicator */
mpi_errno = MPIR_Scatter_intra(sendbuf, sendcnt, sendtype,
recvbuf, recvcnt, recvtype, root,
- comm_ptr);
+ comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
/* intercommunicator */
mpi_errno = MPIR_Scatter_inter(sendbuf, sendcnt, sendtype,
recvbuf, recvcnt, recvtype, root,
- comm_ptr);
+ comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -589,17 +605,17 @@
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Scatter_impl(void *sendbuf, int sendcnt, MPI_Datatype sendtype,
void *recvbuf, int recvcnt, MPI_Datatype recvtype,
- int root, MPID_Comm *comm_ptr)
+ int root, MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Scatter != NULL) {
mpi_errno = comm_ptr->coll_fns->Scatter(sendbuf, sendcnt, sendtype,
- recvbuf, recvcnt, recvtype, root, comm_ptr);
+ recvbuf, recvcnt, recvtype, root, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
mpi_errno = MPIR_Scatter(sendbuf, sendcnt, sendtype,
- recvbuf, recvcnt, recvtype, root, comm_ptr);
+ recvbuf, recvcnt, recvtype, root, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -652,6 +668,7 @@
{
int mpi_errno = MPI_SUCCESS;
MPID_Comm *comm_ptr = NULL;
+ int errflag = FALSE;
MPID_MPI_STATE_DECL(MPID_STATE_MPI_SCATTER);
MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -756,7 +773,7 @@
mpi_errno = MPIR_Scatter_impl(sendbuf, sendcnt, sendtype,
recvbuf, recvcnt, recvtype, root,
- comm_ptr);
+ comm_ptr, &errflag);
if (mpi_errno) goto fn_fail;
/* ... end of body of routine ... */
Modified: mpich2/trunk/src/mpi/coll/scatterv.c
===================================================================
--- mpich2/trunk/src/mpi/coll/scatterv.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/scatterv.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -57,7 +57,8 @@
int recvcnt,
MPI_Datatype recvtype,
int root,
- MPID_Comm *comm_ptr )
+ MPID_Comm *comm_ptr,
+ int *errflag )
{
int rank, comm_size, mpi_errno = MPI_SUCCESS;
int mpi_errno_ret = MPI_SUCCESS;
@@ -106,15 +107,15 @@
}
}
else {
- mpi_errno = MPIC_Isend(((char *)sendbuf+displs[i]*extent),
- sendcnts[i], sendtype, i,
- MPIR_SCATTERV_TAG, comm, &reqarray[reqs++]);
+ mpi_errno = MPIC_Isend_ft(((char *)sendbuf+displs[i]*extent),
+ sendcnts[i], sendtype, i,
+ MPIR_SCATTERV_TAG, comm, &reqarray[reqs++], errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
}
}
/* ... then wait for *all* of them to finish: */
- mpi_errno = MPIR_Waitall_impl(reqs, reqarray, starray);
+ mpi_errno = MPIC_Waitall_ft(reqs, reqarray, starray, errflag);
if (mpi_errno && mpi_errno != MPI_ERR_IN_STATUS) MPIU_ERR_POP(mpi_errno);
/* --BEGIN ERROR HANDLING-- */
if (mpi_errno == MPI_ERR_IN_STATUS) {
@@ -123,6 +124,7 @@
mpi_errno = starray[i].MPI_ERROR;
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -134,10 +136,11 @@
else if (root != MPI_PROC_NULL) { /* non-root nodes, and in the intercomm. case, non-root nodes on remote side */
if (recvcnt) {
- mpi_errno = MPIC_Recv(recvbuf,recvcnt,recvtype,root,
- MPIR_SCATTERV_TAG,comm,MPI_STATUS_IGNORE);
+ mpi_errno = MPIC_Recv_ft(recvbuf,recvcnt,recvtype,root,
+ MPIR_SCATTERV_TAG,comm,MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
+ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
}
@@ -151,6 +154,8 @@
MPIU_CHKLMEM_FREEALL();
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
+ else if (*errflag)
+ MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
@@ -166,19 +171,19 @@
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIR_Scatterv_impl(void *sendbuf, int *sendcnts, int *displs, MPI_Datatype sendtype,
void *recvbuf, int recvcnt, MPI_Datatype recvtype,
- int root, MPID_Comm *comm_ptr)
+ int root, MPID_Comm *comm_ptr, int *errflag)
{
int mpi_errno = MPI_SUCCESS;
if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Scatter != NULL) {
mpi_errno = comm_ptr->coll_fns->Scatterv(sendbuf, sendcnts, displs,
sendtype, recvbuf, recvcnt,
- recvtype, root, comm_ptr);
+ recvtype, root, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
} else {
mpi_errno = MPIR_Scatterv(sendbuf, sendcnts, displs, sendtype,
recvbuf, recvcnt, recvtype,
- root, comm_ptr);
+ root, comm_ptr, errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
}
@@ -232,6 +237,7 @@
{
int mpi_errno = MPI_SUCCESS;
MPID_Comm *comm_ptr = NULL;
+ int errflag = FALSE;
MPID_MPI_STATE_DECL(MPID_STATE_MPI_SCATTERV);
MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -352,7 +358,7 @@
mpi_errno = MPIR_Scatterv_impl(sendbuf, sendcnts, displs, sendtype,
recvbuf, recvcnt, recvtype,
- root, comm_ptr);
+ root, comm_ptr, &errflag);
if (mpi_errno) goto fn_fail;
/* ... end of body of routine ... */
Modified: mpich2/trunk/src/mpi/comm/comm_create.c
===================================================================
--- mpich2/trunk/src/mpi/comm/comm_create.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/comm/comm_create.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -334,7 +334,7 @@
int rinfo[2];
MPID_VCR *mapping_vcr = NULL;
MPID_VCR *remote_mapping_vcr = NULL;
-
+ int errflag = FALSE;
MPIU_CHKLMEM_DECL(1);
MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE_INTER);
@@ -418,19 +418,20 @@
/* Broadcast to the other members of the local group */
mpi_errno = MPIR_Bcast_impl( rinfo, 2, MPI_INT, 0,
- comm_ptr->local_comm);
+ comm_ptr->local_comm, &errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
mpi_errno = MPIR_Bcast_impl( remote_mapping, remote_size, MPI_INT, 0,
- comm_ptr->local_comm);
+ comm_ptr->local_comm, &errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
}
else {
/* The other processes */
/* Broadcast to the other members of the local group */
mpi_errno = MPIR_Bcast_impl( rinfo, 2, MPI_INT, 0,
- comm_ptr->local_comm);
+ comm_ptr->local_comm, &errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
if (newcomm_ptr != NULL) {
newcomm_ptr->context_id = rinfo[0];
}
@@ -439,8 +440,9 @@
remote_size*sizeof(int),
mpi_errno,"remote_mapping");
mpi_errno = MPIR_Bcast_impl( remote_mapping, remote_size, MPI_INT, 0,
- comm_ptr->local_comm);
+ comm_ptr->local_comm, &errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
}
if (group_ptr->rank != MPI_UNDEFINED) {
Modified: mpich2/trunk/src/mpi/comm/comm_split.c
===================================================================
--- mpich2/trunk/src/mpi/comm/comm_split.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/comm/comm_split.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -101,6 +101,7 @@
first_entry = 0, first_remote_entry = 0, *last_ptr;
int in_newcomm; /* TRUE iff *newcomm should be populated */
MPIR_Context_id_t new_context_id, remote_context_id;
+ int errflag = FALSE;
MPIU_CHKLMEM_DECL(4);
rank = comm_ptr->rank;
@@ -125,8 +126,9 @@
local_comm_ptr = comm_ptr;
}
/* Gather information on the local group of processes */
- mpi_errno = MPIR_Allgather_impl( MPI_IN_PLACE, 2, MPI_INT, table, 2, MPI_INT, local_comm_ptr );
+ mpi_errno = MPIR_Allgather_impl( MPI_IN_PLACE, 2, MPI_INT, table, 2, MPI_INT, local_comm_ptr, &errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
/* Step 2: How many processes have our same color? */
new_size = 0;
@@ -172,8 +174,9 @@
mypair.color = color;
mypair.key = key;
mpi_errno = MPIR_Allgather_impl( &mypair, 2, MPI_INT, remotetable, 2, MPI_INT,
- comm_ptr );
+ comm_ptr, &errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
/* Each process can now match its color with the entries in the table */
new_remote_size = 0;
@@ -219,13 +222,15 @@
&remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE,
0, 0, comm_ptr->handle, MPI_STATUS_IGNORE );
if (mpi_errno) { MPIU_ERR_POP( mpi_errno ); }
- mpi_errno = MPIR_Bcast_impl( &remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, local_comm_ptr );
+ mpi_errno = MPIR_Bcast_impl( &remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, local_comm_ptr, &errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
}
else {
/* Broadcast to the other members of the local group */
- mpi_errno = MPIR_Bcast_impl( &remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, local_comm_ptr );
+ mpi_errno = MPIR_Bcast_impl( &remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, local_comm_ptr, &errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
}
}
Modified: mpich2/trunk/src/mpi/comm/commutil.c
===================================================================
--- mpich2/trunk/src/mpi/comm/commutil.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/comm/commutil.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -764,7 +764,7 @@
int own_mask = 0;
int testCount = 10; /* if you change this value, you need to also change
it below where it is reinitialized */
-
+ int errflag = FALSE;
MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GET_CONTEXTID);
MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GET_CONTEXTID);
@@ -829,9 +829,9 @@
other processes to enter the global or brief global critical section.
*/
mpi_errno = MPIR_Allreduce_impl( MPI_IN_PLACE, local_mask, MPIR_MAX_CONTEXT_MASK,
- MPI_INT, MPI_BAND, comm_ptr );
+ MPI_INT, MPI_BAND, comm_ptr, &errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
/* MT FIXME 2/3 cases don't seem to need the CONTEXTID CS, check and
* narrow this region */
MPIU_THREAD_CS_ENTER(CONTEXTID,);
@@ -889,8 +889,9 @@
/* we _must_ release the lock above in order to avoid deadlocking on
* this blocking allreduce operation */
mpi_errno = MPIR_Allreduce_impl( &hasNoId, &totalHasNoId, 1, MPI_INT,
- MPI_MAX, comm_ptr );
+ MPI_MAX, comm_ptr, &errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
if (totalHasNoId == 1) {
/* Release the mask for use by other threads */
if (own_mask) {
@@ -956,6 +957,7 @@
context instead?. Or can we use the tag
provided in the intercomm routine? (not on a dup,
but in that case it can use the collective context) */
+ int errflag = FALSE;
MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID);
MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID);
@@ -983,9 +985,9 @@
/* Make sure that all of the local processes now have this
id */
mpi_errno = MPIR_Bcast_impl( &remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE,
- 0, comm_ptr->local_comm );
+ 0, comm_ptr->local_comm, &errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
/* The recvcontext_id must be the one that was allocated out of the local
* group, not the remote group. Otherwise we could end up posting two
* MPI_ANY_SOURCE,MPI_ANY_TAG recvs on the same context IDs even though we
Modified: mpich2/trunk/src/mpi/comm/intercomm_create.c
===================================================================
--- mpich2/trunk/src/mpi/comm/intercomm_create.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/comm/intercomm_create.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -238,6 +238,7 @@
int is_low_group = 0;
int i;
MPID_Comm *newcomm_ptr;
+ int errflag = FALSE;
MPIU_CHKLMEM_DECL(4);
MPID_MPI_STATE_DECL(MPID_STATE_MPI_INTERCOMM_CREATE);
@@ -457,11 +458,13 @@
comm_info[1] = final_context_id;
comm_info[2] = is_low_group;
MPIU_DBG_MSG(COMM,VERBOSE,"About to bcast on local_comm");
- mpi_errno = MPIR_Bcast_impl( comm_info, 3, MPI_INT, local_leader, comm_ptr );
+ mpi_errno = MPIR_Bcast_impl( comm_info, 3, MPI_INT, local_leader, comm_ptr, &errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
mpi_errno = MPIR_Bcast_impl( remote_gpids, 2*remote_size, MPI_INT, local_leader,
- comm_ptr );
+ comm_ptr, &errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
MPIU_DBG_MSG_D(COMM,VERBOSE,"end of bcast on local_comm of size %d",
comm_ptr->local_size );
}
@@ -469,16 +472,18 @@
{
/* we're the other processes */
MPIU_DBG_MSG(COMM,VERBOSE,"About to receive bcast on local_comm");
- mpi_errno = MPIR_Bcast_impl( comm_info, 3, MPI_INT, local_leader, comm_ptr );
+ mpi_errno = MPIR_Bcast_impl( comm_info, 3, MPI_INT, local_leader, comm_ptr, &errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
remote_size = comm_info[0];
MPIU_CHKLMEM_MALLOC(remote_gpids,int*,2*remote_size*sizeof(int),
mpi_errno,"remote_gpids");
MPIU_CHKLMEM_MALLOC(remote_lpids,int*,remote_size*sizeof(int),
mpi_errno,"remote_lpids");
mpi_errno = MPIR_Bcast_impl( remote_gpids, 2*remote_size, MPI_INT, local_leader,
- comm_ptr );
+ comm_ptr, &errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
/* Extract the context and group sign informatin */
final_context_id = comm_info[1];
Modified: mpich2/trunk/src/mpi/comm/intercomm_merge.c
===================================================================
--- mpich2/trunk/src/mpi/comm/intercomm_merge.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/comm/intercomm_merge.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -75,6 +75,7 @@
MPID_Comm *newcomm_ptr;
int local_high, remote_high, i, j, new_size;
MPIR_Context_id_t new_context_id;
+ int errflag = FALSE;
MPIU_THREADPRIV_DECL;
MPID_MPI_STATE_DECL(MPID_STATE_MPI_INTERCOMM_MERGE);
@@ -137,8 +138,9 @@
error to make */
acthigh = high ? 1 : 0; /* Clamp high into 1 or 0 */
mpi_errno = MPIR_Allreduce_impl( MPI_IN_PLACE, &acthigh, 1, MPI_INT,
- MPI_SUM, comm_ptr->local_comm );
- if (mpi_errno) goto fn_fail;
+ MPI_SUM, comm_ptr->local_comm, &errflag );
+ if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
/* acthigh must either == 0 or the size of the local comm */
if (acthigh != 0 && acthigh != comm_ptr->local_size) {
mpi_errno = MPIR_Err_create_code( MPI_SUCCESS,
@@ -196,9 +198,10 @@
value of local_high, which may have changed if both groups
of processes had the same value for high
*/
- mpi_errno = MPIR_Bcast_impl( &local_high, 1, MPI_INT, 0,
- comm_ptr->local_comm );
- if (mpi_errno) goto fn_fail;
+ mpi_errno = MPIR_Bcast_impl( &local_high, 1, MPI_INT, 0,
+ comm_ptr->local_comm, &errflag );
+ if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
mpi_errno = MPIR_Comm_create( &newcomm_ptr );
if (mpi_errno) goto fn_fail;
Modified: mpich2/trunk/src/mpi/errhan/errnames.txt
===================================================================
--- mpich2/trunk/src/mpi/errhan/errnames.txt 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/errhan/errnames.txt 2011-01-21 18:32:12 UTC (rev 7803)
@@ -876,6 +876,8 @@
**tcp_cleanup_fail:Error while cleaning up failed connection
**tmpvc_connect_fail:Failure during connection protocol
+**coll_fail:Failure during collective
+
**blcr_mod:BLCR kernel module not present
**envvarparse:Unable to parse environment variable
Modified: mpich2/trunk/src/mpi/topo/dist_gr_create.c
===================================================================
--- mpich2/trunk/src/mpi/topo/dist_gr_create.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/topo/dist_gr_create.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -81,6 +81,7 @@
int *rout_idx;
int *rs;
int in_out_peers[2] = {-1, -1};
+ int errflag = FALSE;
MPIU_CHKLMEM_DECL(9);
MPIU_CHKPMEM_DECL(1);
MPID_MPI_STATE_DECL(MPID_STATE_MPI_DIST_GRAPH_CREATE);
@@ -252,8 +253,9 @@
}
/* compute the number of peers I will recv from */
- mpi_errno = MPIR_Reduce_scatter_block_impl(rs, in_out_peers, 2, MPI_INT, MPI_SUM, comm_ptr);
+ mpi_errno = MPIR_Reduce_scatter_block_impl(rs, in_out_peers, 2, MPI_INT, MPI_SUM, comm_ptr, &errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
MPIU_Assert(in_out_peers[0] <= comm_size && in_out_peers[0] >= 0);
MPIU_Assert(in_out_peers[1] <= comm_size && in_out_peers[1] >= 0);
Property changes on: mpich2/trunk/src/mpid
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/error-return/src/mpid:7405-7603,7662-7670
+ /mpich2/branches/dev/coll-err-ret/src/mpid:7771-7802
/mpich2/branches/dev/error-return/src/mpid:7405-7603,7662-7670
Property changes on: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/ckpt/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5050
/mpich2/branches/dev/ckpt2/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5057-6537
/mpich2/branches/dev/error-return/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:7405-7603,7662-7670
/mpich2/branches/dev/ftb/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5661-5730
/mpich2/branches/dev/lapi/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5817
/mpich2/branches/dev/win_rrvm/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:6416,6428
/mpich2/branches/dev/wintcp_async_progress/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5406
+ /mpich2/branches/dev/ckpt/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5050
/mpich2/branches/dev/ckpt2/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5057-6537
/mpich2/branches/dev/coll-err-ret/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:7771-7802
/mpich2/branches/dev/error-return/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:7405-7603,7662-7670
/mpich2/branches/dev/ftb/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5661-5730
/mpich2/branches/dev/lapi/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5817
/mpich2/branches/dev/win_rrvm/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:6416,6428
/mpich2/branches/dev/wintcp_async_progress/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5406
Modified: mpich2/trunk/src/mpid/ch3/src/ch3u_comm_spawn_multiple.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/src/ch3u_comm_spawn_multiple.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpid/ch3/src/ch3u_comm_spawn_multiple.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -256,14 +256,17 @@
}
if (errcodes != MPI_ERRCODES_IGNORE) {
- mpi_errno = MPIR_Bcast_impl(&should_accept, 1, MPI_INT, root, comm_ptr);
+ int errflag = FALSE;
+ mpi_errno = MPIR_Bcast_impl(&should_accept, 1, MPI_INT, root, comm_ptr, &errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
- mpi_errno = MPIR_Bcast_impl(&total_num_processes, 1, MPI_INT, root, comm_ptr);
+ mpi_errno = MPIR_Bcast_impl(&total_num_processes, 1, MPI_INT, root, comm_ptr, &errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
- mpi_errno = MPIR_Bcast_impl(errcodes, total_num_processes, MPI_INT, root, comm_ptr);
+ mpi_errno = MPIR_Bcast_impl(errcodes, total_num_processes, MPI_INT, root, comm_ptr, &errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
}
if (should_accept) {
Modified: mpich2/trunk/src/mpid/ch3/src/ch3u_port.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/src/ch3u_port.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpid/ch3/src/ch3u_port.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -343,6 +343,7 @@
pg_node *pg_list = NULL;
MPIDI_PG_t **remote_pg = NULL;
MPIR_Context_id_t recvcontext_id = MPIR_INVALID_CONTEXT_ID;
+ int errflag = FALSE;
MPIU_CHKLMEM_DECL(3);
MPIDI_STATE_DECL(MPID_STATE_MPIDI_COMM_CONNECT);
@@ -403,10 +404,9 @@
/* broadcast the received info to local processes */
MPIU_DBG_MSG(CH3_CONNECT,VERBOSE,"broadcasting the received 3 ints");
- mpi_errno = MPIR_Bcast_intra(recv_ints, 3, MPI_INT, root, comm_ptr);
- if (mpi_errno) {
- MPIU_ERR_POP(mpi_errno);
- }
+ mpi_errno = MPIR_Bcast_intra(recv_ints, 3, MPI_INT, root, comm_ptr, &errflag);
+ if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
/* check if root was unable to connect to the port */
MPIU_ERR_CHKANDJUMP1(recv_ints[0] == -1, mpi_errno, MPI_ERR_PORT, "**portexist", "**portexist %s", port_name);
@@ -461,10 +461,10 @@
/* Broadcast out the remote rank translation array */
MPIU_DBG_MSG(CH3_CONNECT,VERBOSE,"Broadcasting remote translation");
mpi_errno = MPIR_Bcast_intra(remote_translation, remote_comm_size * 2, MPI_INT,
- root, comm_ptr);
- if (mpi_errno) {
- MPIU_ERR_POP(mpi_errno);
- }
+ root, comm_ptr, &errflag);
+ if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
#ifdef MPICH_DBG_OUTPUT
MPIU_DBG_PRINTF(("[%d]connect:Received remote_translation after broadcast:\n", rank));
for (i=0; i<remote_comm_size; i++)
@@ -547,8 +547,12 @@
/* notify other processes to return an error */
MPIU_DBG_MSG(CH3_CONNECT,VERBOSE,"broadcasting 3 ints: error case");
- mpi_errno2 = MPIR_Bcast_intra(recv_ints, 3, MPI_INT, root, comm_ptr);
+ mpi_errno2 = MPIR_Bcast_intra(recv_ints, 3, MPI_INT, root, comm_ptr, &errflag);
if (mpi_errno2) MPIU_ERR_ADD(mpi_errno, mpi_errno2);
+ if (errflag) {
+ MPIU_ERR_SET(mpi_errno2, MPI_ERR_OTHER, "**coll_fail");
+ MPIU_ERR_ADD(mpi_errno, mpi_errno2);
+ }
goto fn_fail;
}
}
@@ -685,6 +689,7 @@
int rank = comm_ptr->rank;
int mpi_errno = 0;
int recvtag = *recvtag_p;
+ int errflag = FALSE;
MPIDI_STATE_DECL(MPID_STATE_RECEIVEPGANDDISTRIBUTE);
MPIDI_FUNC_ENTER(MPID_STATE_RECEIVEPGANDDISTRIBUTE);
@@ -713,10 +718,9 @@
/* Broadcast the size and data to the local communicator */
/*printf("accept:broadcasting 1 int\n");fflush(stdout);*/
- mpi_errno = MPIR_Bcast_intra(&j, 1, MPI_INT, root, comm_ptr);
- if (mpi_errno != MPI_SUCCESS) {
- MPIU_ERR_POP(mpi_errno);
- }
+ mpi_errno = MPIR_Bcast_intra(&j, 1, MPI_INT, root, comm_ptr, &errflag);
+ if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
if (rank != root) {
/* The root has already allocated this string */
@@ -726,10 +730,9 @@
}
}
/*printf("accept:broadcasting string of length %d\n", j);fflush(stdout);*/
- mpi_errno = MPIR_Bcast_intra(pg_str, j, MPI_CHAR, root, comm_ptr);
- if (mpi_errno != MPI_SUCCESS) {
- MPIU_ERR_POP(mpi_errno);
- }
+ mpi_errno = MPIR_Bcast_intra(pg_str, j, MPI_CHAR, root, comm_ptr, &errflag);
+ if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
/* Then reconstruct the received process group. This step
also initializes the created process group */
@@ -762,6 +765,7 @@
pg_translation *local_translation = 0;
pg_node *pg_list, *pg_next, *pg_head = 0;
int rank, i, peer_comm_size;
+ int errflag = FALSE;
MPIU_CHKLMEM_DECL(1);
peer_comm_size = comm_p->local_size;
@@ -778,9 +782,9 @@
}
/* Now, broadcast the number of local pgs */
- mpi_errno = MPIR_Bcast_impl( &n_local_pgs, 1, MPI_INT, root, comm_p);
+ mpi_errno = MPIR_Bcast_impl( &n_local_pgs, 1, MPI_INT, root, comm_p, &errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
pg_list = pg_head;
for (i=0; i<n_local_pgs; i++) {
@@ -799,8 +803,9 @@
len = pg_list->lenStr;
pg_list = pg_list->next;
}
- mpi_errno = MPIR_Bcast_impl( &len, 1, MPI_INT, root, comm_p);
+ mpi_errno = MPIR_Bcast_impl( &len, 1, MPI_INT, root, comm_p, &errflag);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
if (rank != root) {
pg_str = (char *)MPIU_Malloc(len);
if (!pg_str) {
@@ -808,12 +813,14 @@
goto fn_exit;
}
}
- mpi_errno = MPIR_Bcast_impl( pg_str, len, MPI_CHAR, root, comm_p);
+ mpi_errno = MPIR_Bcast_impl( pg_str, len, MPI_CHAR, root, comm_p, &errflag);
if (mpi_errno) {
if (rank != root)
MPIU_Free( pg_str );
MPIU_ERR_POP(mpi_errno);
}
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
if (rank != root) {
/* flag is true if the pg was created, false if it
already existed. This step
@@ -930,6 +937,7 @@
pg_translation *local_translation = NULL, *remote_translation = NULL;
pg_node *pg_list = NULL;
MPIDI_PG_t **remote_pg = NULL;
+ int errflag = FALSE;
MPIU_CHKLMEM_DECL(3);
MPIDI_STATE_DECL(MPID_STATE_MPIDI_COMM_ACCEPT);
@@ -991,11 +999,11 @@
/* broadcast the received info to local processes */
/*printf("accept:broadcasting 2 ints - %d and %d\n", recv_ints[0], recv_ints[1]);fflush(stdout);*/
- mpi_errno = MPIR_Bcast_intra(recv_ints, 3, MPI_INT, root, comm_ptr);
- if (mpi_errno) {
- MPIU_ERR_POP(mpi_errno);
- }
+ mpi_errno = MPIR_Bcast_intra(recv_ints, 3, MPI_INT, root, comm_ptr, &errflag);
+ if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
n_remote_pgs = recv_ints[0];
remote_comm_size = recv_ints[1];
context_id = recv_ints[2];
@@ -1042,7 +1050,9 @@
/* Broadcast out the remote rank translation array */
MPIU_DBG_MSG(CH3_CONNECT,VERBOSE,"Broadcast remote_translation");
mpi_errno = MPIR_Bcast_intra(remote_translation, remote_comm_size * 2, MPI_INT,
- root, comm_ptr);
+ root, comm_ptr, &errflag);
+ if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
#ifdef MPICH_DBG_OUTPUT
MPIU_DBG_PRINTF(("[%d]accept:Received remote_translation after broadcast:\n", rank));
for (i=0; i<remote_comm_size; i++)
Modified: mpich2/trunk/src/mpid/ch3/src/ch3u_rma_ops.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/src/ch3u_rma_ops.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpid/ch3/src/ch3u_rma_ops.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -33,6 +33,7 @@
int mpi_errno=MPI_SUCCESS, i, k, comm_size, rank;
MPI_Aint *tmp_buf;
MPID_Comm *win_comm_ptr;
+ int errflag = FALSE;
MPIU_CHKPMEM_DECL(4);
MPIU_CHKLMEM_DECL(1);
MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_CREATE);
@@ -123,10 +124,12 @@
mpi_errno = MPIR_Allgather_impl(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
tmp_buf, 3 * sizeof(MPI_Aint), MPI_BYTE,
- comm_ptr);
+ comm_ptr, &errflag);
MPIU_INSTR_DURATION_END(wincreate_allgather);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
k = 0;
for (i=0; i<comm_size; i++)
{
@@ -158,7 +161,7 @@
int mpi_errno=MPI_SUCCESS, total_pt_rma_puts_accs;
int in_use;
MPID_Comm *comm_ptr;
-
+ int errflag = FALSE;
MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_FREE);
MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_WIN_FREE);
@@ -167,8 +170,9 @@
MPIU_INSTR_DURATION_START(winfree_rs);
mpi_errno = MPIR_Reduce_scatter_block_impl((*win_ptr)->pt_rma_puts_accs,
&total_pt_rma_puts_accs, 1,
- MPI_INT, MPI_SUM, comm_ptr);
+ MPI_INT, MPI_SUM, comm_ptr, &errflag);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
MPIU_INSTR_DURATION_END(winfree_rs);
if (total_pt_rma_puts_accs != (*win_ptr)->my_pt_rma_puts_accs)
Modified: mpich2/trunk/src/mpid/ch3/src/ch3u_rma_sync.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/src/ch3u_rma_sync.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpid/ch3/src/ch3u_rma_sync.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -116,6 +116,7 @@
MPID_Comm *comm_ptr;
MPI_Win source_win_handle, target_win_handle;
MPID_Progress_state progress_state;
+ int errflag = FALSE;
MPIU_CHKLMEM_DECL(3);
MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_FENCE);
@@ -213,10 +214,11 @@
win_ptr->my_counter = comm_size;
mpi_errno = MPIR_Reduce_scatter_block_impl(MPI_IN_PLACE, rma_target_proc, 1,
- MPI_INT, MPI_SUM, comm_ptr);
+ MPI_INT, MPI_SUM, comm_ptr, &errflag);
MPIU_INSTR_DURATION_END(winfence_rs);
/* result is stored in rma_target_proc[0] */
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
/* Set the completion counter */
/* FIXME: MT: this needs to be done atomically because other
Modified: mpich2/trunk/src/mpid/ch3/src/mpid_vc.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/src/mpid_vc.c 2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpid/ch3/src/mpid_vc.c 2011-01-21 18:32:12 UTC (rev 7803)
@@ -573,7 +573,8 @@
int i, allfound = 1, pgid, pgidWorld;
MPIDI_PG_t *pg = 0;
MPIDI_PG_iterator iter;
-
+ int errflag = FALSE;
+
/* Get the pgid for CommWorld (always attached to the first process
group) */
MPIDI_PG_Get_iterator(&iter);
@@ -600,9 +601,10 @@
}
/* See if everyone is happy */
- mpi_errno = MPIR_Allreduce_impl( MPI_IN_PLACE, &allfound, 1, MPI_INT, MPI_LAND, comm_ptr );
+ mpi_errno = MPIR_Allreduce_impl( MPI_IN_PLACE, &allfound, 1, MPI_INT, MPI_LAND, comm_ptr, &errflag );
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
if (allfound) return MPI_SUCCESS;
/* FIXME: We need a cleaner way to handle this case than using an ifdef.
Property changes on: mpich2/trunk/src/mpl/src/mplstr.c
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/ckpt2/src/mpl/src/string/mplstr.c:5182,5196,5198
/mpich2/branches/dev/error-return/src/mpl/src/mplstr.c:7662-7670
/mpich2/branches/dev/ftb/src/mpl/src/mplstr.c:5661-5730
/mpich2/branches/dev/lapi/src/mpl/src/mplstr.c:5817
/mpich2/branches/release/mpich2-1.1.1/src/mpl/src/string/mplstr.c:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/mpl/src/string/mplstr.c:5406
+ /mpich2/branches/dev/ckpt2/src/mpl/src/string/mplstr.c:5182,5196,5198
/mpich2/branches/dev/coll-err-ret/src/mpl/src/mplstr.c:7771-7802
/mpich2/branches/dev/error-return/src/mpl/src/mplstr.c:7662-7670
/mpich2/branches/dev/ftb/src/mpl/src/mplstr.c:5661-5730
/mpich2/branches/dev/lapi/src/mpl/src/mplstr.c:5817
/mpich2/branches/release/mpich2-1.1.1/src/mpl/src/string/mplstr.c:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/mpl/src/string/mplstr.c:5406
Property changes on: mpich2/trunk/src/pm/hydra
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/ckpt/src/pm/hydra:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra:7662-7670*
/mpich2/branches/dev/ftb/src/pm/hydra:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra:5406
+ /mpich2/branches/dev/ckpt/src/pm/hydra:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra:7771-7802*
/mpich2/branches/dev/error-return/src/pm/hydra:7662-7670*
/mpich2/branches/dev/ftb/src/pm/hydra:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra:5406
Property changes on: mpich2/trunk/src/pm/hydra/Makefile.am
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/ckpt/src/pm/hydra/Makefile.am:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/Makefile.am:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/Makefile.am:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/Makefile.am:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/Makefile.am:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/Makefile.am:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/Makefile.am:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/Makefile.am:5406
+ /mpich2/branches/dev/ckpt/src/pm/hydra/Makefile.am:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/Makefile.am:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/Makefile.am:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/Makefile.am:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/Makefile.am:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/Makefile.am:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/Makefile.am:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/Makefile.am:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/Makefile.am:5406
Property changes on: mpich2/trunk/src/pm/hydra/README
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/ckpt/src/pm/hydra/README:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/README:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/README:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/README:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/README:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/README:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/README:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/README:5406
+ /mpich2/branches/dev/ckpt/src/pm/hydra/README:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/README:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/README:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/README:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/README:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/README:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/README:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/README:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/README:5406
Property changes on: mpich2/trunk/src/pm/hydra/autogen.sh
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/ckpt/src/pm/hydra/autogen.sh:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/autogen.sh:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/autogen.sh:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/autogen.sh:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/autogen.sh:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/autogen.sh:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/autogen.sh:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/autogen.sh:5406
+ /mpich2/branches/dev/ckpt/src/pm/hydra/autogen.sh:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/autogen.sh:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/autogen.sh:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/autogen.sh:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/autogen.sh:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/autogen.sh:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/autogen.sh:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/autogen.sh:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/autogen.sh:5406
Property changes on: mpich2/trunk/src/pm/hydra/configure.in
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/ckpt/src/pm/hydra/configure.in:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/configure.in:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/configure.in:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/configure.in:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/configure.in:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/configure.in:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/configure.in:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/configure.in:5406
+ /mpich2/branches/dev/ckpt/src/pm/hydra/configure.in:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/configure.in:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/configure.in:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/configure.in:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/configure.in:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/configure.in:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/configure.in:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/configure.in:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/configure.in:5406
Property changes on: mpich2/trunk/src/pm/hydra/examples
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/ckpt/src/pm/hydra/examples:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/examples:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/examples:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/examples:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/examples:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/examples:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/examples:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/examples:5406
+ /mpich2/branches/dev/ckpt/src/pm/hydra/examples:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/examples:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/examples:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/examples:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/examples:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/examples:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/examples:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/examples:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/examples:5406
Property changes on: mpich2/trunk/src/pm/hydra/hydra-doxygen.cfg.in
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/ckpt/src/pm/hydra/hydra-doxygen.cfg.in:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/hydra-doxygen.cfg.in:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/hydra-doxygen.cfg.in:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/hydra-doxygen.cfg.in:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/hydra-doxygen.cfg.in:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/hydra-doxygen.cfg.in:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/hydra-doxygen.cfg.in:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/hydra-doxygen.cfg.in:5406
+ /mpich2/branches/dev/ckpt/src/pm/hydra/hydra-doxygen.cfg.in:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/hydra-doxygen.cfg.in:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/hydra-doxygen.cfg.in:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/hydra-doxygen.cfg.in:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/hydra-doxygen.cfg.in:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/hydra-doxygen.cfg.in:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/hydra-doxygen.cfg.in:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/hydra-doxygen.cfg.in:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/hydra-doxygen.cfg.in:5406
Property changes on: mpich2/trunk/src/pm/hydra/include
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/ckpt/src/pm/hydra/include:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/include:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/include:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/include:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/include:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/include:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/include:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/include:5406
+ /mpich2/branches/dev/ckpt/src/pm/hydra/include:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/include:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/include:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/include:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/include:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/include:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/include:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/include:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/include:5406
Property changes on: mpich2/trunk/src/pm/hydra/mpich2prereq
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/ckpt/src/pm/hydra/mpich2prereq:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/mpich2prereq:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/mpich2prereq:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/mpich2prereq:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/mpich2prereq:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/mpich2prereq:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/mpich2prereq:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/mpich2prereq:5406
+ /mpich2/branches/dev/ckpt/src/pm/hydra/mpich2prereq:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/mpich2prereq:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/mpich2prereq:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/mpich2prereq:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/mpich2prereq:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/mpich2prereq:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/mpich2prereq:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/mpich2prereq:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/mpich2prereq:5406
Property changes on: mpich2/trunk/src/pm/hydra/pm
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/ckpt/src/pm/hydra/pm:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/pm:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/pm:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/pm:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/pm:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/pm:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/pm:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/pm:5406
+ /mpich2/branches/dev/ckpt/src/pm/hydra/pm:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/pm:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/pm:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/pm:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/pm:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/pm:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/pm:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/pm:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/pm:5406
Property changes on: mpich2/trunk/src/pm/hydra/tools
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/ckpt/src/pm/hydra/tools:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/tools:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/tools:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/tools:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/tools:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/tools:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/tools:5406
+ /mpich2/branches/dev/ckpt/src/pm/hydra/tools:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/tools:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/tools:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/tools:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/tools:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/tools:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/tools:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/tools:5406
Property changes on: mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm_query_proxy_id.c
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/error-return/src/pm/hydra/tools/bootstrap/external/slurm_query_proxy_id.c:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/tools/bootstrap/slurm/slurm_query_proxy_id.c:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools/bootstrap/slurm/slurm_query_proxy_id.c:5817
+ /mpich2/branches/dev/coll-err-ret/src/pm/hydra/tools/bootstrap/external/slurm_query_proxy_id.c:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/tools/bootstrap/external/slurm_query_proxy_id.c:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/tools/bootstrap/slurm/slurm_query_proxy_id.c:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools/bootstrap/slurm/slurm_query_proxy_id.c:5817
Property changes on: mpich2/trunk/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/error-return/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c:5817
+ /mpich2/branches/dev/coll-err-ret/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c:5817
Property changes on: mpich2/trunk/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/error-return/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c:5817
+ /mpich2/branches/dev/coll-err-ret/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c:5817
Property changes on: mpich2/trunk/src/pm/hydra/ui
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/ckpt/src/pm/hydra/ui:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/ui:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/ui:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/ui:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/ui:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/ui:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/ui:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/ui:5406
+ /mpich2/branches/dev/ckpt/src/pm/hydra/ui:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/ui:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/ui:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/ui:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/ui:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/ui:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/ui:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/ui:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/ui:5406
Property changes on: mpich2/trunk/src/pm/hydra/utils
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/ckpt/src/pm/hydra/utils:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/utils:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/utils:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/utils:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/utils:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/utils:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/utils:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/utils:5406
+ /mpich2/branches/dev/ckpt/src/pm/hydra/utils:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/utils:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/utils:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/utils:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/utils:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/utils:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/utils:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/utils:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/utils:5406
Property changes on: mpich2/trunk/winconfigure.wsf
___________________________________________________________________
Modified: svn:mergeinfo
- /mpich2/branches/dev/ckpt/winconfigure.wsf:5050
/mpich2/branches/dev/ckpt2/winconfigure.wsf:5057-6537
/mpich2/branches/dev/error-return/winconfigure.wsf:7662-7670
/mpich2/branches/dev/ftb/winconfigure.wsf:5661-5730
/mpich2/branches/dev/lapi/winconfigure.wsf:5817
/mpich2/branches/dev/win_rrvm/winconfigure.wsf:6404,6407-6408,6420,6422-6423
/mpich2/branches/dev/wintcp_async_progress/winconfigure.wsf:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/winconfigure.wsf:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/winconfigure.wsf:5406
+ /mpich2/branches/dev/ckpt/winconfigure.wsf:5050
/mpich2/branches/dev/ckpt2/winconfigure.wsf:5057-6537
/mpich2/branches/dev/coll-err-ret/winconfigure.wsf:7771-7802
/mpich2/branches/dev/error-return/winconfigure.wsf:7662-7670
/mpich2/branches/dev/ftb/winconfigure.wsf:5661-5730
/mpich2/branches/dev/lapi/winconfigure.wsf:5817
/mpich2/branches/dev/win_rrvm/winconfigure.wsf:6404,6407-6408,6420,6422-6423
/mpich2/branches/dev/wintcp_async_progress/winconfigure.wsf:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/winconfigure.wsf:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/winconfigure.wsf:5406
More information about the mpich2-commits
mailing list