[mpich2-commits] r7803 - in mpich2/trunk: . confdb maint src/include src/mpi/coll src/mpi/comm src/mpi/errhan src/mpi/topo src/mpid src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp src/mpid/ch3/src src/mpl/src src/pm/hydra src/pm/hydra/examples src/pm/hydra/include src/pm/hydra/pm src/pm/hydra/tools src/pm/hydra/tools/bootstrap/external src/pm/hydra/tools/bootstrap/src src/pm/hydra/tools/bootstrap/utils src/pm/hydra/ui src/pm/hydra/utils

buntinas at mcs.anl.gov buntinas at mcs.anl.gov
Fri Jan 21 12:32:12 CST 2011


Author: buntinas
Date: 2011-01-21 12:32:12 -0600 (Fri, 21 Jan 2011)
New Revision: 7803

Modified:
   mpich2/trunk/
   mpich2/trunk/confdb/
   mpich2/trunk/maint/Version
   mpich2/trunk/src/include/mpiimpl.h
   mpich2/trunk/src/mpi/coll/allgather.c
   mpich2/trunk/src/mpi/coll/allgatherv.c
   mpich2/trunk/src/mpi/coll/allreduce.c
   mpich2/trunk/src/mpi/coll/alltoall.c
   mpich2/trunk/src/mpi/coll/alltoallv.c
   mpich2/trunk/src/mpi/coll/alltoallw.c
   mpich2/trunk/src/mpi/coll/barrier.c
   mpich2/trunk/src/mpi/coll/bcast.c
   mpich2/trunk/src/mpi/coll/exscan.c
   mpich2/trunk/src/mpi/coll/gather.c
   mpich2/trunk/src/mpi/coll/gatherv.c
   mpich2/trunk/src/mpi/coll/helper_fns.c
   mpich2/trunk/src/mpi/coll/red_scat.c
   mpich2/trunk/src/mpi/coll/red_scat_block.c
   mpich2/trunk/src/mpi/coll/reduce.c
   mpich2/trunk/src/mpi/coll/scan.c
   mpich2/trunk/src/mpi/coll/scatter.c
   mpich2/trunk/src/mpi/coll/scatterv.c
   mpich2/trunk/src/mpi/comm/comm_create.c
   mpich2/trunk/src/mpi/comm/comm_split.c
   mpich2/trunk/src/mpi/comm/commutil.c
   mpich2/trunk/src/mpi/comm/intercomm_create.c
   mpich2/trunk/src/mpi/comm/intercomm_merge.c
   mpich2/trunk/src/mpi/errhan/errnames.txt
   mpich2/trunk/src/mpi/topo/dist_gr_create.c
   mpich2/trunk/src/mpid/
   mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c
   mpich2/trunk/src/mpid/ch3/src/ch3u_comm_spawn_multiple.c
   mpich2/trunk/src/mpid/ch3/src/ch3u_port.c
   mpich2/trunk/src/mpid/ch3/src/ch3u_rma_ops.c
   mpich2/trunk/src/mpid/ch3/src/ch3u_rma_sync.c
   mpich2/trunk/src/mpid/ch3/src/mpid_vc.c
   mpich2/trunk/src/mpl/src/mplstr.c
   mpich2/trunk/src/pm/hydra/
   mpich2/trunk/src/pm/hydra/Makefile.am
   mpich2/trunk/src/pm/hydra/README
   mpich2/trunk/src/pm/hydra/autogen.sh
   mpich2/trunk/src/pm/hydra/configure.in
   mpich2/trunk/src/pm/hydra/examples/
   mpich2/trunk/src/pm/hydra/hydra-doxygen.cfg.in
   mpich2/trunk/src/pm/hydra/include/
   mpich2/trunk/src/pm/hydra/mpich2prereq
   mpich2/trunk/src/pm/hydra/pm/
   mpich2/trunk/src/pm/hydra/tools/
   mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm_query_proxy_id.c
   mpich2/trunk/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c
   mpich2/trunk/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c
   mpich2/trunk/src/pm/hydra/ui/
   mpich2/trunk/src/pm/hydra/utils/
   mpich2/trunk/winconfigure.wsf
Log:
merging error returns for collective branch into trunk


Property changes on: mpich2/trunk
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt:5050
/mpich2/branches/dev/ckpt2:5057-6537
/mpich2/branches/dev/error-return:7662-7670
/mpich2/branches/dev/ftb:5661-5730
/mpich2/branches/dev/lapi:5817
/mpich2/branches/dev/wintcp_async_progress:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2:5406
   + /mpich2/branches/dev/ckpt:5050
/mpich2/branches/dev/ckpt2:5057-6537
/mpich2/branches/dev/coll-err-ret:7771-7802
/mpich2/branches/dev/error-return:7662-7670
/mpich2/branches/dev/ftb:5661-5730
/mpich2/branches/dev/lapi:5817
/mpich2/branches/dev/wintcp_async_progress:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2:5406


Property changes on: mpich2/trunk/confdb
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt2/confdb:5180,5182,5196,5198
/mpich2/branches/dev/error-return/confdb:7662-7670
/mpich2/branches/dev/ftb/confdb:5661-5730
/mpich2/branches/dev/lapi/confdb:5817
/mpich2/branches/dev/wintcp_async_progress/confdb:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/confdb:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/confdb:5406
   + /mpich2/branches/dev/ckpt2/confdb:5180,5182,5196,5198
/mpich2/branches/dev/coll-err-ret/confdb:7771-7802
/mpich2/branches/dev/error-return/confdb:7662-7670
/mpich2/branches/dev/ftb/confdb:5661-5730
/mpich2/branches/dev/lapi/confdb:5817
/mpich2/branches/dev/wintcp_async_progress/confdb:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/confdb:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/confdb:5406


Property changes on: mpich2/trunk/maint/Version
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt/maint/Version:5050
/mpich2/branches/dev/ckpt2/maint/Version:5057-6537
/mpich2/branches/dev/error-return/maint/Version:7662-7670
/mpich2/branches/dev/ftb/maint/Version:5661-5730
/mpich2/branches/dev/lapi/maint/Version:5817
/mpich2/branches/dev/wintcp_async_progress/maint/Version:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/maint/Version:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/maint/Version:5406
/mpich2/trunk/src/pm/hydra/VERSION:7662-7666
   + /mpich2/branches/dev/ckpt/maint/Version:5050
/mpich2/branches/dev/ckpt2/maint/Version:5057-6537
/mpich2/branches/dev/coll-err-ret/maint/Version:7771-7802
/mpich2/branches/dev/error-return/maint/Version:7662-7670
/mpich2/branches/dev/ftb/maint/Version:5661-5730
/mpich2/branches/dev/lapi/maint/Version:5817
/mpich2/branches/dev/wintcp_async_progress/maint/Version:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/maint/Version:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/maint/Version:5406
/mpich2/trunk/src/pm/hydra/VERSION:7662-7666

Modified: mpich2/trunk/src/include/mpiimpl.h
===================================================================
--- mpich2/trunk/src/include/mpiimpl.h	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/include/mpiimpl.h	2011-01-21 18:32:12 UTC (rev 7803)
@@ -1739,35 +1739,35 @@
     int ref_count;   /* Supports lazy copies */
     /* Contains pointers to the functions for the MPI collectives */
     int (*Barrier) (MPID_Comm *);
-    int (*Bcast) (void*, int, MPI_Datatype, int, MPID_Comm * );
+    int (*Bcast) (void*, int, MPI_Datatype, int, MPID_Comm *, int *);
     int (*Gather) (void*, int, MPI_Datatype, void*, int, MPI_Datatype, 
-                   int, MPID_Comm *); 
+                   int, MPID_Comm *, int *); 
     int (*Gatherv) (void*, int, MPI_Datatype, void*, int *, int *, 
-                    MPI_Datatype, int, MPID_Comm *); 
+                    MPI_Datatype, int, MPID_Comm *, int *); 
     int (*Scatter) (void*, int, MPI_Datatype, void*, int, MPI_Datatype, 
-                    int, MPID_Comm *);
+                    int, MPID_Comm *, int *);
     int (*Scatterv) (void*, int *, int *, MPI_Datatype, void*, int, 
-                    MPI_Datatype, int, MPID_Comm *);
+                    MPI_Datatype, int, MPID_Comm *, int *);
     int (*Allgather) (void*, int, MPI_Datatype, void*, int, 
-                      MPI_Datatype, MPID_Comm *);
+                      MPI_Datatype, MPID_Comm *, int *);
     int (*Allgatherv) (void*, int, MPI_Datatype, void*, int *, int *, 
-                       MPI_Datatype, MPID_Comm *);
+                       MPI_Datatype, MPID_Comm *, int *);
     int (*Alltoall) (void*, int, MPI_Datatype, void*, int, MPI_Datatype, 
-                               MPID_Comm *);
+                               MPID_Comm *, int *);
     int (*Alltoallv) (void*, int *, int *, MPI_Datatype, void*, int *, 
-                     int *, MPI_Datatype, MPID_Comm *);
+                     int *, MPI_Datatype, MPID_Comm *, int *);
     int (*Alltoallw) (void*, int *, int *, MPI_Datatype *, void*, int *, 
-                     int *, MPI_Datatype *, MPID_Comm *);
+                     int *, MPI_Datatype *, MPID_Comm *, int *);
     int (*Reduce) (void*, void*, int, MPI_Datatype, MPI_Op, int, 
-                   MPID_Comm *);
+                   MPID_Comm *, int *);
     int (*Allreduce) (void*, void*, int, MPI_Datatype, MPI_Op, 
-                      MPID_Comm *);
+                      MPID_Comm *, int *);
     int (*Reduce_scatter) (void*, void*, int *, MPI_Datatype, MPI_Op, 
-                           MPID_Comm *);
-    int (*Scan) (void*, void*, int, MPI_Datatype, MPI_Op, MPID_Comm * );
-    int (*Exscan) (void*, void*, int, MPI_Datatype, MPI_Op, MPID_Comm * );
+                           MPID_Comm *, int *);
+    int (*Scan) (void*, void*, int, MPI_Datatype, MPI_Op, MPID_Comm *, int * );
+    int (*Exscan) (void*, void*, int, MPI_Datatype, MPI_Op, MPID_Comm *, int * );
     int (*Reduce_scatter_block) (void*, void*, int, MPI_Datatype, MPI_Op, 
-                           MPID_Comm *);
+                           MPID_Comm *, int *);
 
     /* MPI-3 nonblocking collectives */
     int (*Ibarrier)(MPID_Comm *comm_ptr, MPID_Sched_t s);
@@ -3260,6 +3260,7 @@
 #define MPIR_TOPO_A_TAG               26
 #define MPIR_TOPO_B_TAG               27
 #define MPIR_REDUCE_SCATTER_BLOCK_TAG 28
+#define MPIR_ERROR_TAG                29
 
 /* These functions are used in the implementation of collective and
    other internal operations. They are wrappers around MPID send/recv
@@ -3288,6 +3289,28 @@
 int MPIC_Wait(MPID_Request * request_ptr);
 int MPIC_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status);
 
+/* FT versions of te MPIC_ functions */
+int MPIC_Send_ft(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+                 MPI_Comm comm, int *errflag);
+int MPIC_Recv_ft(void *buf, int count, MPI_Datatype datatype, int source, int tag,
+                 MPI_Comm comm, MPI_Status *status, int *errflag);
+int MPIC_Ssend_ft(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+                  MPI_Comm comm, int *errflag);
+int MPIC_Sendrecv_ft(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                     int dest, int sendtag, void *recvbuf, int recvcount,
+                     MPI_Datatype recvtype, int source, int recvtag,
+                     MPI_Comm comm, MPI_Status *status, int *errflag);
+int MPIC_Sendrecv_replace_ft(void *buf, int count, MPI_Datatype datatype,
+                             int dest, int sendtag,
+                             int source, int recvtag,
+                             MPI_Comm comm, MPI_Status *status, int *errflag);
+int MPIC_Isend_ft(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+                  MPI_Comm comm, MPI_Request *request, int *errflag);
+int MPIC_Irecv_ft(void *buf, int count, MPI_Datatype datatype, int source,
+                  int tag, MPI_Comm comm, MPI_Request *request);
+int MPIC_Waitall_ft(int numreq, MPI_Request requests[], MPI_Status statuses[], int *errflag);
+
+
 void MPIR_MAXF  ( void *, void *, int *, MPI_Datatype * ) ;
 void MPIR_MINF  ( void *, void *, int *, MPI_Datatype * ) ;
 void MPIR_SUM  ( void *, void *, int *, MPI_Datatype * ) ;
@@ -3330,160 +3353,160 @@
 
 int MPIR_Allgather_impl(void *sendbuf, int sendcount, MPI_Datatype sendtype,
                         void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                        MPID_Comm *comm_ptr );
+                        MPID_Comm *comm_ptr, int *errflag );
 int MPIR_Allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
                    void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                   MPID_Comm *comm_ptr );
+                   MPID_Comm *comm_ptr, int *errflag );
 int MPIR_Allgather_intra(void *sendbuf, int sendcount, MPI_Datatype sendtype,
                          void *recvbuf, int recvcount, MPI_Datatype recvtype, 
-                         MPID_Comm *comm_ptr );
+                         MPID_Comm *comm_ptr, int *errflag );
 int MPIR_Allgather_inter(void *sendbuf, int sendcount, MPI_Datatype sendtype,
                          void *recvbuf, int recvcount, MPI_Datatype recvtype, 
-                         MPID_Comm *comm_ptr );
+                         MPID_Comm *comm_ptr, int *errflag );
 int MPIR_Allgatherv_impl(void *sendbuf, int sendcount, MPI_Datatype sendtype, 
                          void *recvbuf, int *recvcounts, int *displs,   
-                         MPI_Datatype recvtype, MPID_Comm *comm_ptr );
+                         MPI_Datatype recvtype, MPID_Comm *comm_ptr, int *errflag );
 int MPIR_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, 
                     void *recvbuf, int *recvcounts, int *displs,   
-                    MPI_Datatype recvtype, MPID_Comm *comm_ptr );
+                    MPI_Datatype recvtype, MPID_Comm *comm_ptr, int *errflag );
 int MPIR_Allgatherv_intra(void *sendbuf, int sendcount, MPI_Datatype sendtype, 
                           void *recvbuf, int *recvcounts, int *displs,   
-                          MPI_Datatype recvtype, MPID_Comm *comm_ptr );
+                          MPI_Datatype recvtype, MPID_Comm *comm_pt, int *errflag );
 int MPIR_Allgatherv_inter(void *sendbuf, int sendcount, MPI_Datatype sendtype, 
                           void *recvbuf, int *recvcounts, int *displs,   
-                          MPI_Datatype recvtype, MPID_Comm *comm_ptr );
+                          MPI_Datatype recvtype, MPID_Comm *comm_ptr, int *errflag );
 int MPIR_Allreduce_impl(void *sendbuf, void *recvbuf, int count, 
-                        MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr);
+                        MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Allreduce(void *sendbuf, void *recvbuf, int count, 
-                   MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr);
+                   MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Allreduce_intra(void *sendbuf, void *recvbuf, int count, 
-                         MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr);
+                         MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Allreduce_inter(void *sendbuf, void *recvbuf, int count, 
-                        MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr);
+                        MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Alltoall_impl(void *sendbuf, int sendcount, MPI_Datatype sendtype,
                        void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                       MPID_Comm *comm_ptr);
+                       MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype,
                   void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                  MPID_Comm *comm_ptr);
+                  MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Alltoall_intra(void *sendbuf, int sendcount, MPI_Datatype sendtype, 
                         void *recvbuf, int recvcount, MPI_Datatype recvtype, 
-                        MPID_Comm *comm_ptr);
+                        MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Alltoall_inter(void *sendbuf, int sendcount, MPI_Datatype sendtype, 
                         void *recvbuf, int recvcount, MPI_Datatype recvtype, 
-                        MPID_Comm *comm_ptr);
+                        MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Alltoallv_impl(void *sendbuf, int *sendcnts, int *sdispls, 
                         MPI_Datatype sendtype, void *recvbuf, int *recvcnts, 
-                        int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr);
+                        int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Alltoallv(void *sendbuf, int *sendcnts, int *sdispls, 
                    MPI_Datatype sendtype, void *recvbuf, int *recvcnts, 
-                   int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr);
+                   int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Alltoallv_intra(void *sendbuf, int *sendcnts, int *sdispls, 
                          MPI_Datatype sendtype, void *recvbuf, int *recvcnts, 
-                         int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr);
+                         int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Alltoallv_inter(void *sendbuf, int *sendcnts, int *sdispls, 
                          MPI_Datatype sendtype, void *recvbuf, int *recvcnts, 
                          int *rdispls, MPI_Datatype recvtype, 
-                         MPID_Comm *comm_ptr);
+                         MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Alltoallw_impl(void *sendbuf, int *sendcnts, int *sdispls, 
                         MPI_Datatype *sendtypes, void *recvbuf, int *recvcnts, 
-                        int *rdispls, MPI_Datatype *recvtypes, MPID_Comm *comm_ptr);
+                        int *rdispls, MPI_Datatype *recvtypes, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Alltoallw(void *sendbuf, int *sendcnts, int *sdispls, 
                    MPI_Datatype *sendtypes, void *recvbuf, int *recvcnts, 
-                   int *rdispls, MPI_Datatype *recvtypes, MPID_Comm *comm_ptr);
+                   int *rdispls, MPI_Datatype *recvtypes, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Alltoallw_intra(void *sendbuf, int *sendcnts, int *sdispls, 
                          MPI_Datatype *sendtypes, void *recvbuf, int *recvcnts, 
-                         int *rdispls, MPI_Datatype *recvtypes, MPID_Comm *comm_ptr);
+                         int *rdispls, MPI_Datatype *recvtypes, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Alltoallw_inter(void *sendbuf, int *sendcnts, int *sdispls, 
                          MPI_Datatype *sendtypes, void *recvbuf, 
                          int *recvcnts, int *rdispls, MPI_Datatype *recvtypes, 
-                         MPID_Comm *comm_ptr);
+                         MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Bcast_inter(void *buffer, int count, MPI_Datatype datatype, 
-		     int root, MPID_Comm *comm_ptr);
+		     int root, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Bcast_intra (void *buffer, int count, MPI_Datatype datatype, int
-                      root, MPID_Comm *comm_ptr);
+                      root, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Bcast (void *buffer, int count, MPI_Datatype datatype, int
-                root, MPID_Comm *comm_ptr);
+                root, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Bcast_impl (void *buffer, int count, MPI_Datatype datatype, int
-                root, MPID_Comm *comm_ptr);
+                root, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
-                MPI_Op op, MPID_Comm *comm_ptr );
+                MPI_Op op, MPID_Comm *comm_ptr, int *errflag );
 int MPIR_Exscan_impl(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
-                     MPI_Op op, MPID_Comm *comm_ptr );
+                     MPI_Op op, MPID_Comm *comm_ptr, int *errflag );
 int MPIR_Gather_impl (void *sendbuf, int sendcnt, MPI_Datatype sendtype,
                       void *recvbuf, int recvcnt, MPI_Datatype recvtype,
-                      int root, MPID_Comm *comm_ptr);
+                      int root, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Gather (void *sendbuf, int sendcnt, MPI_Datatype sendtype,
                  void *recvbuf, int recvcnt, MPI_Datatype recvtype,
-                 int root, MPID_Comm *comm_ptr);
+                 int root, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Gather_intra (void *sendbuf, int sendcnt, MPI_Datatype sendtype,
                        void *recvbuf, int recvcnt, MPI_Datatype recvtype,
-                       int root, MPID_Comm *comm_ptr);
+                       int root, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Gather_inter (void *sendbuf, int sendcnt, MPI_Datatype sendtype, 
                        void *recvbuf, int recvcnt, MPI_Datatype recvtype, 
-                       int root, MPID_Comm *comm_ptr );
+                       int root, MPID_Comm *comm_ptr, int *errflag );
 int MPIR_Gatherv (void *sendbuf, int sendcnt, MPI_Datatype sendtype, 
                   void *recvbuf, int *recvcnts, int *displs,
-                  MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr); 
+                  MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, int *errflag); 
 int MPIR_Gatherv_impl (void *sendbuf, int sendcnt, MPI_Datatype sendtype, 
                        void *recvbuf, int *recvcnts, int *displs,
-                       MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr); 
+                       MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, int *errflag); 
 int MPIR_Reduce_scatter_impl(void *sendbuf, void *recvbuf, int *recvcnts, 
-                             MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr);
+                             MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Reduce_scatter(void *sendbuf, void *recvbuf, int *recvcnts, 
-                        MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr);
+                        MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Reduce_scatter_intra(void *sendbuf, void *recvbuf, int *recvcnts, 
-                              MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr);
+                              MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Reduce_scatter_inter(void *sendbuf, void *recvbuf, int *recvcnts, 
                               MPI_Datatype datatype, MPI_Op op, 
-                              MPID_Comm *comm_ptr);
+                              MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Reduce_scatter_block_impl(void *sendbuf, void *recvbuf, int recvcount,
                                    MPI_Datatype datatype, MPI_Op op, MPID_Comm
-                                   *comm_ptr );
+                                   *comm_ptr, int *errflag );
 int MPIR_Reduce_scatter_block(void *sendbuf, void *recvbuf, int recvcount,
                               MPI_Datatype datatype, MPI_Op op, MPID_Comm
-                              *comm_ptr );
+                              *comm_ptr, int *errflag );
 int MPIR_Reduce_scatter_block_intra(void *sendbuf, void *recvbuf, int recvcount,
                                     MPI_Datatype datatype, MPI_Op op, MPID_Comm
-                                    *comm_ptr );
+                                    *comm_ptr, int *errflag );
 int MPIR_Reduce_scatter_block_inter(void *sendbuf, void *recvbuf, int recvcount,
                                     MPI_Datatype datatype, MPI_Op op, MPID_Comm
-                                    *comm_ptr);
+                                    *comm_ptr, int *errflag);
 int MPIR_Reduce_impl(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
-                     MPI_Op op, int root, MPID_Comm *comm_ptr );
+                     MPI_Op op, int root, MPID_Comm *comm_ptr, int *errflag );
 int MPIR_Reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
-                MPI_Op op, int root, MPID_Comm *comm_ptr );
+                MPI_Op op, int root, MPID_Comm *comm_ptr, int *errflag );
 int MPIR_Reduce_intra(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
-                      MPI_Op op, int root, MPID_Comm *comm_ptr );
+                      MPI_Op op, int root, MPID_Comm *comm_ptr, int *errflag );
 int MPIR_Reduce_inter (void *sendbuf, void *recvbuf, int count, MPI_Datatype
-                       datatype, MPI_Op op, int root, MPID_Comm *comm_ptr); 
+                       datatype, MPI_Op op, int root, MPID_Comm *comm_ptr, int *errflag); 
 int MPIR_Scan_impl(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, 
-                   MPI_Op op, MPID_Comm *comm_ptr);
+                   MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, 
-              MPI_Op op, MPID_Comm *comm_ptr);
+              MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
 int MPIR_Scatter_impl(void *sendbuf, int sendcnt, MPI_Datatype sendtype, 
                       void *recvbuf, int recvcnt, MPI_Datatype recvtype, 
-                      int root, MPID_Comm *comm_ptr );
+                      int root, MPID_Comm *comm_ptr, int *errflag );
 int MPIR_Scatter(void *sendbuf, int sendcnt, MPI_Datatype sendtype, 
                  void *recvbuf, int recvcnt, MPI_Datatype recvtype, 
-                 int root, MPID_Comm *comm_ptr );
+                 int root, MPID_Comm *comm_ptr, int *errflag );
 int MPIR_Scatter_intra(void *sendbuf, int sendcnt, MPI_Datatype sendtype, 
                        void *recvbuf, int recvcnt, MPI_Datatype recvtype, 
-                       int root, MPID_Comm *comm_ptr );
+                       int root, MPID_Comm *comm_ptr, int *errflag );
 int MPIR_Scatter_inter(void *sendbuf, int sendcnt, MPI_Datatype sendtype, 
                        void *recvbuf, int recvcnt, MPI_Datatype recvtype, 
-                       int root, MPID_Comm *comm_ptr );
+                       int root, MPID_Comm *comm_ptr, int *errflag );
 int MPIR_Scatterv_impl (void *sendbuf, int *sendcnts, int *displs,
                         MPI_Datatype sendtype, void *recvbuf, int recvcnt,
                         MPI_Datatype recvtype, int root, MPID_Comm
-                        *comm_ptr);
+                        *comm_ptr, int *errflag);
 int MPIR_Scatterv (void *sendbuf, int *sendcnts, int *displs,
                    MPI_Datatype sendtype, void *recvbuf, int recvcnt,
                    MPI_Datatype recvtype, int root, MPID_Comm
-                   *comm_ptr);
+                   *comm_ptr, int *errflag);
 int MPIR_Barrier_impl( MPID_Comm *comm_ptr);
 int MPIR_Barrier( MPID_Comm *comm_ptr);
-int MPIR_Barrier_intra( MPID_Comm *comm_ptr );
+int MPIR_Barrier_intra( MPID_Comm *comm_ptr);
 int MPIR_Barrier_inter( MPID_Comm *comm_ptr);
 
 int MPIR_Reduce_local_impl(void *inbuf, void *inoutbuf, int count, MPI_Datatype datatype, MPI_Op op);

Modified: mpich2/trunk/src/mpi/coll/allgather.c
===================================================================
--- mpich2/trunk/src/mpi/coll/allgather.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/allgather.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -81,7 +81,8 @@
     void *recvbuf, 
     int recvcount, 
     MPI_Datatype recvtype, 
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag )
 {
     int comm_size, rank;
     int mpi_errno = MPI_SUCCESS;
@@ -167,15 +168,16 @@
                 recv_offset = dst_tree_root * recvcount * recvtype_extent;
                 
                 if (dst < comm_size) {
-                    mpi_errno = MPIC_Sendrecv(((char *)recvbuf + send_offset),
-                                              curr_cnt, recvtype, dst,
-                                              MPIR_ALLGATHER_TAG,  
-                                              ((char *)recvbuf + recv_offset),
-					      (comm_size-dst_tree_root)*recvcount,
-                                              recvtype, dst,
-                                              MPIR_ALLGATHER_TAG, comm, &status);
+                    mpi_errno = MPIC_Sendrecv_ft(((char *)recvbuf + send_offset),
+                                                 curr_cnt, recvtype, dst,
+                                                 MPIR_ALLGATHER_TAG,  
+                                                 ((char *)recvbuf + recv_offset),
+                                                 (comm_size-dst_tree_root)*recvcount,
+                                                 recvtype, dst,
+                                                 MPIR_ALLGATHER_TAG, comm, &status, errflag);
 		    if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                         last_recv_cnt = 0;
@@ -231,15 +233,16 @@
                         if ((dst > rank) && 
                             (rank < tree_root + nprocs_completed)
                             && (dst >= tree_root + nprocs_completed)) {
-                            mpi_errno = MPIC_Send(((char *)recvbuf + offset),
-                                                  last_recv_cnt,
-                                                  recvtype, dst,
-                                                  MPIR_ALLGATHER_TAG, comm); 
+                            mpi_errno = MPIC_Send_ft(((char *)recvbuf + offset),
+                                                     last_recv_cnt,
+                                                     recvtype, dst,
+                                                     MPIR_ALLGATHER_TAG, comm, errflag); 
                             /* last_recv_cnt was set in the previous
                                receive. that's the amount of data to be
                                sent now. */
                             if (mpi_errno) {
                                 /* for communication errors, just record the error but continue */
+                                *errflag = TRUE;
                                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                             }
@@ -249,15 +252,16 @@
                         else if ((dst < rank) && 
                                  (dst < tree_root + nprocs_completed) &&
                                  (rank >= tree_root + nprocs_completed)) {
-                            mpi_errno = MPIC_Recv(((char *)recvbuf + offset),  
-						  (comm_size - (my_tree_root + mask))*recvcount,
-                                                  recvtype, dst,
-                                                  MPIR_ALLGATHER_TAG,
-                                                  comm, &status); 
+                            mpi_errno = MPIC_Recv_ft(((char *)recvbuf + offset),  
+                                                     (comm_size - (my_tree_root + mask))*recvcount,
+                                                     recvtype, dst,
+                                                     MPIR_ALLGATHER_TAG,
+                                                     comm, &status, errflag); 
                             /* nprocs_completed is also equal to the
                                no. of processes whose data we don't have */
                             if (mpi_errno) {
                                 /* for communication errors, just record the error but continue */
+                                *errflag = TRUE;
                                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                                 last_recv_cnt = 0;
@@ -332,15 +336,16 @@
                 recv_offset = dst_tree_root * nbytes;
                 
                 if (dst < comm_size) {
-                    mpi_errno = MPIC_Sendrecv(((char *)tmp_buf + send_offset),
-                                              curr_cnt, MPI_BYTE, dst,
-                                              MPIR_ALLGATHER_TAG,  
-                                              ((char *)tmp_buf + recv_offset),
-					      tmp_buf_size - recv_offset,
-                                              MPI_BYTE, dst,
-                                              MPIR_ALLGATHER_TAG, comm, &status);
+                    mpi_errno = MPIC_Sendrecv_ft(((char *)tmp_buf + send_offset),
+                                                 curr_cnt, MPI_BYTE, dst,
+                                                 MPIR_ALLGATHER_TAG,  
+                                                 ((char *)tmp_buf + recv_offset),
+                                                 tmp_buf_size - recv_offset,
+                                                 MPI_BYTE, dst,
+                                                 MPIR_ALLGATHER_TAG, comm, &status, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                         last_recv_cnt = 0;
@@ -389,12 +394,13 @@
                             (rank < tree_root + nprocs_completed)
                             && (dst >= tree_root + nprocs_completed)) {
                             
-                            mpi_errno = MPIC_Send(((char *)tmp_buf + offset),
-                                                  last_recv_cnt, MPI_BYTE,
-                                                  dst, MPIR_ALLGATHER_TAG,
-                                                  comm);
+                            mpi_errno = MPIC_Send_ft(((char *)tmp_buf + offset),
+                                                     last_recv_cnt, MPI_BYTE,
+                                                     dst, MPIR_ALLGATHER_TAG,
+                                                     comm, errflag);
                             if (mpi_errno) {
                                 /* for communication errors, just record the error but continue */
+                                *errflag = TRUE;
                                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                             }
@@ -407,15 +413,16 @@
                         else if ((dst < rank) && 
                                  (dst < tree_root + nprocs_completed) &&
                                  (rank >= tree_root + nprocs_completed)) {
-                            mpi_errno = MPIC_Recv(((char *)tmp_buf + offset),
-                                                  tmp_buf_size - offset,
-                                                  MPI_BYTE, dst,
-                                                  MPIR_ALLGATHER_TAG,
-                                                  comm, &status); 
+                            mpi_errno = MPIC_Recv_ft(((char *)tmp_buf + offset),
+                                                     tmp_buf_size - offset,
+                                                     MPI_BYTE, dst,
+                                                     MPIR_ALLGATHER_TAG,
+                                                     comm, &status, errflag); 
                             /* nprocs_completed is also equal to the
                                no. of processes whose data we don't have */
                             if (mpi_errno) {
                                 /* for communication errors, just record the error but continue */
+                                *errflag = TRUE;
                                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                                 last_recv_cnt = 0;
@@ -482,14 +489,15 @@
             src = (rank + pof2) % comm_size;
             dst = (rank - pof2 + comm_size) % comm_size;
             
-            mpi_errno = MPIC_Sendrecv(tmp_buf, curr_cnt, recvtype, dst,
-                                      MPIR_ALLGATHER_TAG,
-                                  ((char *)tmp_buf + curr_cnt*recvtype_extent),
-                                      curr_cnt, recvtype,
-                                      src, MPIR_ALLGATHER_TAG, comm,
-                                      MPI_STATUS_IGNORE);
+            mpi_errno = MPIC_Sendrecv_ft(tmp_buf, curr_cnt, recvtype, dst,
+                                         MPIR_ALLGATHER_TAG,
+                                         ((char *)tmp_buf + curr_cnt*recvtype_extent),
+                                         curr_cnt, recvtype,
+                                         src, MPIR_ALLGATHER_TAG, comm,
+                                         MPI_STATUS_IGNORE, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -504,14 +512,15 @@
             src = (rank + pof2) % comm_size;
             dst = (rank - pof2 + comm_size) % comm_size;
             
-            mpi_errno = MPIC_Sendrecv(tmp_buf, rem * recvcount, recvtype,
-                                      dst, MPIR_ALLGATHER_TAG,
-                                  ((char *)tmp_buf + curr_cnt*recvtype_extent),
-                                      rem * recvcount, recvtype,
-                                      src, MPIR_ALLGATHER_TAG, comm,
-                                      MPI_STATUS_IGNORE);
+            mpi_errno = MPIC_Sendrecv_ft(tmp_buf, rem * recvcount, recvtype,
+                                         dst, MPIR_ALLGATHER_TAG,
+                                         ((char *)tmp_buf + curr_cnt*recvtype_extent),
+                                         rem * recvcount, recvtype,
+                                         src, MPIR_ALLGATHER_TAG, comm,
+                                         MPI_STATUS_IGNORE, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -562,17 +571,18 @@
         j     = rank;
         jnext = left;
         for (i=1; i<comm_size; i++) {
-            mpi_errno = MPIC_Sendrecv(((char *)recvbuf +
-                                       j*recvcount*recvtype_extent), 
-                                      recvcount, recvtype, right,
-                                      MPIR_ALLGATHER_TAG, 
-                                      ((char *)recvbuf +
-                                       jnext*recvcount*recvtype_extent), 
-                                      recvcount, recvtype, left, 
-                                      MPIR_ALLGATHER_TAG, comm,
-                                      MPI_STATUS_IGNORE);
+            mpi_errno = MPIC_Sendrecv_ft(((char *)recvbuf +
+                                          j*recvcount*recvtype_extent), 
+                                         recvcount, recvtype, right,
+                                         MPIR_ALLGATHER_TAG, 
+                                         ((char *)recvbuf +
+                                          jnext*recvcount*recvtype_extent), 
+                                         recvcount, recvtype, left, 
+                                         MPIR_ALLGATHER_TAG, comm,
+                                         MPI_STATUS_IGNORE, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -587,6 +597,9 @@
     MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
     return mpi_errno;
 
  fn_fail:
@@ -608,7 +621,8 @@
     void *recvbuf, 
     int recvcount, 
     MPI_Datatype recvtype, 
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag)
 {
     /* Intercommunicator Allgather.
        Each group does a gather to local root with the local
@@ -650,9 +664,10 @@
 
     if (sendcount != 0) {
         mpi_errno = MPIR_Gather_impl(sendbuf, sendcount, sendtype, tmp_buf, sendcount,
-                                     sendtype, 0, newcomm_ptr);
+                                     sendtype, 0, newcomm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -665,9 +680,10 @@
         if (sendcount != 0) {
             root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
             mpi_errno = MPIR_Bcast_inter(tmp_buf, sendcount*local_size,
-                                         sendtype, root, comm_ptr);
+                                         sendtype, root, comm_ptr, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -677,9 +693,10 @@
         if (recvcount != 0) {
             root = 0;
             mpi_errno = MPIR_Bcast_inter(recvbuf, recvcount*remote_size,
-                                         recvtype, root, comm_ptr);
+                                         recvtype, root, comm_ptr, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -690,9 +707,10 @@
         if (recvcount != 0) {
             root = 0;
             mpi_errno = MPIR_Bcast_inter(recvbuf, recvcount*remote_size,
-                                         recvtype, root, comm_ptr);
+                                         recvtype, root, comm_ptr, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -702,9 +720,10 @@
         if (sendcount != 0) {
             root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
             mpi_errno = MPIR_Bcast_inter(tmp_buf, sendcount*local_size,
-                                         sendtype, root, comm_ptr);
+                                         sendtype, root, comm_ptr, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -715,6 +734,9 @@
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
     return mpi_errno;
 
   fn_fail:
@@ -732,7 +754,7 @@
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
                    void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                   MPID_Comm *comm_ptr)
+                   MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
 
@@ -740,13 +762,13 @@
         /* intracommunicator */
         mpi_errno = MPIR_Allgather_intra(sendbuf, sendcount, sendtype,
                                          recvbuf, recvcount, recvtype,
-                                         comm_ptr);
+                                         comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         /* intercommunicator */
         mpi_errno = MPIR_Allgather_inter(sendbuf, sendcount, sendtype,
                                          recvbuf, recvcount, recvtype,
-                                         comm_ptr);
+                                         comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -766,7 +788,7 @@
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Allgather_impl(void *sendbuf, int sendcount, MPI_Datatype sendtype,
                         void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                        MPID_Comm *comm_ptr)
+                        MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
 
@@ -774,12 +796,12 @@
     {
 	mpi_errno = comm_ptr->coll_fns->Allgather(sendbuf, sendcount, sendtype,
                                                   recvbuf, recvcount, recvtype,
-                                                  comm_ptr);
+                                                  comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         mpi_errno = MPIR_Allgather(sendbuf, sendcount, sendtype,
                                    recvbuf, recvcount, recvtype,
-                                   comm_ptr);
+                                   comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -845,6 +867,7 @@
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Comm *comm_ptr = NULL;
+    int errflag = FALSE;
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLGATHER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -917,7 +940,7 @@
 
     mpi_errno = MPIR_Allgather_impl(sendbuf, sendcount, sendtype,
                                     recvbuf, recvcount, recvtype,
-                                    comm_ptr);
+                                    comm_ptr, &errflag);
     if (mpi_errno) goto fn_fail;
     
     /* ... end of body of routine ... */

Modified: mpich2/trunk/src/mpi/coll/allgatherv.c
===================================================================
--- mpich2/trunk/src/mpi/coll/allgatherv.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/allgatherv.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -75,7 +75,8 @@
     int *recvcounts, 
     int *displs,   
     MPI_Datatype recvtype, 
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag )
 {
     MPI_Comm comm;
     int        comm_size, rank, j, i, left, right;
@@ -185,15 +186,16 @@
                     for (j=0; j<dst_tree_root; j++)
                         recv_offset += recvcounts[j];
 
-                    mpi_errno = MPIC_Sendrecv(((char *)tmp_buf + send_offset * recvtype_extent),
-                                              curr_cnt, recvtype, dst,
-                                              MPIR_ALLGATHERV_TAG,  
-                                              ((char *)tmp_buf + recv_offset * recvtype_extent),
-                                              total_count - recv_offset, recvtype, dst,
-                                              MPIR_ALLGATHERV_TAG,
-                                              comm, &status);
+                    mpi_errno = MPIC_Sendrecv_ft(((char *)tmp_buf + send_offset * recvtype_extent),
+                                                 curr_cnt, recvtype, dst,
+                                                 MPIR_ALLGATHERV_TAG,  
+                                                 ((char *)tmp_buf + recv_offset * recvtype_extent),
+                                                 total_count - recv_offset, recvtype, dst,
+                                                 MPIR_ALLGATHERV_TAG,
+                                                 comm, &status, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                         last_recv_cnt = 0;
@@ -255,12 +257,13 @@
                                 offset += recvcounts[j];
                             offset *= recvtype_extent;
 
-                            mpi_errno = MPIC_Send(((char *)tmp_buf + offset),
-                                                  last_recv_cnt,
-                                                  recvtype, dst,
-                                                  MPIR_ALLGATHERV_TAG, comm);
+                            mpi_errno = MPIC_Send_ft(((char *)tmp_buf + offset),
+                                                     last_recv_cnt,
+                                                     recvtype, dst,
+                                                     MPIR_ALLGATHERV_TAG, comm, errflag);
                             if (mpi_errno) {
                                 /* for communication errors, just record the error but continue */
+                                *errflag = TRUE;
                                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                             }
@@ -278,12 +281,13 @@
                             for (j=0; j<(my_tree_root+mask); j++)
                                 offset += recvcounts[j];
 
-                            mpi_errno = MPIC_Recv(((char *)tmp_buf + offset * recvtype_extent),
-                                                  total_count - offset, recvtype,
-                                                  dst, MPIR_ALLGATHERV_TAG,
-                                                  comm, &status);
+                            mpi_errno = MPIC_Recv_ft(((char *)tmp_buf + offset * recvtype_extent),
+                                                     total_count - offset, recvtype,
+                                                     dst, MPIR_ALLGATHERV_TAG,
+                                                     comm, &status, errflag);
                             if (mpi_errno) {
                                 /* for communication errors, just record the error but continue */
+                                *errflag = TRUE;
                                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                                 last_recv_cnt = 0;
@@ -384,14 +388,15 @@
                 recv_offset *= nbytes;
                 
                 if (dst < comm_size) {
-                    mpi_errno = MPIC_Sendrecv(((char *)tmp_buf + send_offset),
-                                              curr_cnt, MPI_BYTE, dst,
-                                              MPIR_ALLGATHERV_TAG,  
-                                              ((char *)tmp_buf + recv_offset),
-                                              tmp_buf_size-recv_offset, MPI_BYTE, dst,
-                                              MPIR_ALLGATHERV_TAG, comm, &status);
+                    mpi_errno = MPIC_Sendrecv_ft(((char *)tmp_buf + send_offset),
+                                                 curr_cnt, MPI_BYTE, dst,
+                                                 MPIR_ALLGATHERV_TAG,  
+                                                 ((char *)tmp_buf + recv_offset),
+                                                 tmp_buf_size-recv_offset, MPI_BYTE, dst,
+                                                 MPIR_ALLGATHERV_TAG, comm, &status, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                         last_recv_cnt = 0;
@@ -445,12 +450,13 @@
                             (rank < tree_root + nprocs_completed)
                             && (dst >= tree_root + nprocs_completed)) {
                             
-                            mpi_errno = MPIC_Send(((char *)tmp_buf + offset),
-                                                  last_recv_cnt, MPI_BYTE,
-                                                  dst, MPIR_ALLGATHERV_TAG,
-                                                  comm);
+                            mpi_errno = MPIC_Send_ft(((char *)tmp_buf + offset),
+                                                     last_recv_cnt, MPI_BYTE,
+                                                     dst, MPIR_ALLGATHERV_TAG,
+                                                     comm, errflag);
                             if (mpi_errno) {
                                 /* for communication errors, just record the error but continue */
+                                *errflag = TRUE;
                                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                             }
@@ -463,13 +469,14 @@
                         else if ((dst < rank) && 
                                  (dst < tree_root + nprocs_completed) &&
                                  (rank >= tree_root + nprocs_completed)) {
-                            mpi_errno = MPIC_Recv(((char *)tmp_buf + offset),
-                                                  tmp_buf_size-offset, MPI_BYTE,
-                                                  dst,
-                                                  MPIR_ALLGATHERV_TAG,
-                                                  comm, &status);
+                            mpi_errno = MPIC_Recv_ft(((char *)tmp_buf + offset),
+                                                     tmp_buf_size-offset, MPI_BYTE,
+                                                     dst,
+                                                     MPIR_ALLGATHERV_TAG,
+                                                     comm, &status, errflag);
                             if (mpi_errno) {
                                 /* for communication errors, just record the error but continue */
+                                *errflag = TRUE;
                                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                                 last_recv_cnt = 0;
@@ -544,13 +551,14 @@
             src = (rank + pof2) % comm_size;
             dst = (rank - pof2 + comm_size) % comm_size;
             
-            mpi_errno = MPIC_Sendrecv(tmp_buf, curr_cnt, recvtype, dst,
-                                      MPIR_ALLGATHERV_TAG,
-                                  ((char *)tmp_buf + curr_cnt*recvtype_extent),
-                                      total_count - curr_cnt, recvtype,
-                                      src, MPIR_ALLGATHERV_TAG, comm, &status);
+            mpi_errno = MPIC_Sendrecv_ft(tmp_buf, curr_cnt, recvtype, dst,
+                                         MPIR_ALLGATHERV_TAG,
+                                         ((char *)tmp_buf + curr_cnt*recvtype_extent),
+                                         total_count - curr_cnt, recvtype,
+                                         src, MPIR_ALLGATHERV_TAG, comm, &status, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 recv_cnt = 0;
@@ -572,14 +580,15 @@
             for (i=0; i<rem; i++)
                 send_cnt += recvcounts[(rank+i)%comm_size];
 
-            mpi_errno = MPIC_Sendrecv(tmp_buf, send_cnt, recvtype,
-                                      dst, MPIR_ALLGATHERV_TAG,
-                                  ((char *)tmp_buf + curr_cnt*recvtype_extent),
-                                      total_count - curr_cnt, recvtype,
-                                      src, MPIR_ALLGATHERV_TAG, comm,
-                                      MPI_STATUS_IGNORE);
+            mpi_errno = MPIC_Sendrecv_ft(tmp_buf, send_cnt, recvtype,
+                                         dst, MPIR_ALLGATHERV_TAG,
+                                         ((char *)tmp_buf + curr_cnt*recvtype_extent),
+                                         total_count - curr_cnt, recvtype,
+                                         src, MPIR_ALLGATHERV_TAG, comm,
+                                         MPI_STATUS_IGNORE, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -664,29 +673,32 @@
 		 * consecutive processes contribute 0 bytes each. */
 	    }
 	    else if (!sendnow) { /* If there's no data to send, just do a recv call */
-		mpi_errno = MPIC_Recv(rbuf, recvnow, recvtype, left, MPIR_ALLGATHERV_TAG, comm, &status);
+		mpi_errno = MPIC_Recv_ft(rbuf, recvnow, recvtype, left, MPIR_ALLGATHERV_TAG, comm, &status, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
 		torecv -= recvnow;
 	    }
 	    else if (!recvnow) { /* If there's no data to receive, just do a send call */
-		mpi_errno = MPIC_Send(sbuf, sendnow, recvtype, right, MPIR_ALLGATHERV_TAG, comm);
+		mpi_errno = MPIC_Send_ft(sbuf, sendnow, recvtype, right, MPIR_ALLGATHERV_TAG, comm, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
 		tosend -= sendnow;
 	    }
 	    else { /* There's data to be sent and received */
-		mpi_errno = MPIC_Sendrecv(sbuf, sendnow, recvtype, right, MPIR_ALLGATHERV_TAG, 
-					  rbuf, recvnow, recvtype, left, MPIR_ALLGATHERV_TAG,
-					  comm, &status);
+		mpi_errno = MPIC_Sendrecv_ft(sbuf, sendnow, recvtype, right, MPIR_ALLGATHERV_TAG, 
+                                             rbuf, recvnow, recvtype, left, MPIR_ALLGATHERV_TAG,
+                                             comm, &status, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -713,6 +725,9 @@
     MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -733,7 +748,8 @@
     int *recvcounts, 
     int *displs,   
     MPI_Datatype recvtype, 
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag )
 {
 /* Intercommunicator Allgatherv.
    This is done differently from the intercommunicator allgather
@@ -758,9 +774,10 @@
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
         mpi_errno = MPIR_Gatherv_impl(sendbuf, sendcount, sendtype, recvbuf,
                                       recvcounts, displs, recvtype, root,
-                                      comm_ptr);
+                                      comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -768,9 +785,10 @@
         root = 0;
         mpi_errno = MPIR_Gatherv_impl(sendbuf, sendcount, sendtype, recvbuf,
                                       recvcounts, displs, recvtype, root,
-                                      comm_ptr);
+                                      comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -780,9 +798,10 @@
         root = 0;
         mpi_errno = MPIR_Gatherv_impl(sendbuf, sendcount, sendtype, recvbuf,
                                       recvcounts, displs, recvtype, root,
-                                      comm_ptr);
+                                      comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -790,9 +809,10 @@
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
         mpi_errno = MPIR_Gatherv_impl(sendbuf, sendcount, sendtype, recvbuf,
                                       recvcounts, displs, recvtype, root,
-                                      comm_ptr);
+                                      comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -815,9 +835,10 @@
     mpi_errno = MPIR_Type_commit_impl(&newtype);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
-    mpi_errno = MPIR_Bcast_intra(recvbuf, 1, newtype, 0, newcomm_ptr);
+    mpi_errno = MPIR_Bcast_intra(recvbuf, 1, newtype, 0, newcomm_ptr, errflag);
     if (mpi_errno) {
         /* for communication errors, just record the error but continue */
+        *errflag = TRUE;
         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
     }
@@ -827,6 +848,9 @@
  fn_exit:
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
     return mpi_errno;
  fn_fail:
     /* --BEGIN ERROR HANDLING-- */
@@ -848,7 +872,7 @@
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
                     void *recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype,
-                    MPID_Comm *comm_ptr)
+                    MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
         
@@ -856,13 +880,13 @@
         /* intracommunicator */
         mpi_errno = MPIR_Allgatherv_intra(sendbuf, sendcount, sendtype,
                                           recvbuf, recvcounts, displs, recvtype,
-                                          comm_ptr);
+                                          comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         /* intracommunicator */
         mpi_errno = MPIR_Allgatherv_inter(sendbuf, sendcount, sendtype,
                                           recvbuf, recvcounts, displs, recvtype,
-                                          comm_ptr);
+                                          comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -884,17 +908,17 @@
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Allgatherv_impl(void *sendbuf, int sendcount, MPI_Datatype sendtype,
                          void *recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype,
-                         MPID_Comm *comm_ptr)
+                         MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
         
     if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Allgatherv != NULL) {
 	mpi_errno = comm_ptr->coll_fns->Allgatherv(sendbuf, sendcount, sendtype,
-                                                   recvbuf, recvcounts, displs, recvtype, comm_ptr);
+                                                   recvbuf, recvcounts, displs, recvtype, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         mpi_errno = MPIR_Allgatherv(sendbuf, sendcount, sendtype,
-                                    recvbuf, recvcounts, displs, recvtype, comm_ptr);
+                                    recvbuf, recvcounts, displs, recvtype, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -967,6 +991,7 @@
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Comm *comm_ptr = NULL;
+    int errflag = FALSE;
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLGATHERV);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -1046,7 +1071,7 @@
 
     mpi_errno = MPIR_Allgatherv_impl(sendbuf, sendcount, sendtype,
                                      recvbuf, recvcounts, displs, recvtype,
-                                     comm_ptr);
+                                     comm_ptr, &errflag);
     if (mpi_errno) goto fn_fail;
 
     /* ... end of body of routine ... */

Modified: mpich2/trunk/src/mpi/coll/allreduce.c
===================================================================
--- mpich2/trunk/src/mpi/coll/allreduce.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/allreduce.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -93,15 +93,15 @@
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 static inline int allreduce_intra_or_coll_fn(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
-                                             MPID_Comm *comm_ptr)
+                                             MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
 
     if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Allreduce != NULL) {
-	mpi_errno = comm_ptr->coll_fns->Allreduce(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+	mpi_errno = comm_ptr->coll_fns->Allreduce(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
-        mpi_errno = MPIR_Allreduce_intra(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+        mpi_errno = MPIR_Allreduce_intra(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
         
@@ -124,7 +124,8 @@
     int count, 
     MPI_Datatype datatype, 
     MPI_Op op, 
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag )
 {
     int is_homogeneous;
 #ifdef MPID_HAS_HETERO
@@ -174,16 +175,18 @@
                 /* IN_PLACE and not root of reduce. Data supplied to this
                    allreduce is in recvbuf. Pass that as the sendbuf to reduce. */
 			
-                mpi_errno = MPIR_Reduce_impl(recvbuf, NULL, count, datatype, op, 0, comm_ptr->node_comm);
+                mpi_errno = MPIR_Reduce_impl(recvbuf, NULL, count, datatype, op, 0, comm_ptr->node_comm, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
             } else {
-                mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype, op, 0, comm_ptr->node_comm);
+                mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype, op, 0, comm_ptr->node_comm, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -198,9 +201,11 @@
 
         /* now do an IN_PLACE allreduce among the local roots of all nodes */
         if (comm_ptr->node_roots_comm != NULL) {
-            mpi_errno = allreduce_intra_or_coll_fn(MPI_IN_PLACE, recvbuf, count, datatype, op, comm_ptr->node_roots_comm);
+            mpi_errno = allreduce_intra_or_coll_fn(MPI_IN_PLACE, recvbuf, count, datatype, op, comm_ptr->node_roots_comm,
+                                                   errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -208,9 +213,10 @@
 
         /* now broadcast the result among local processes */
         if (comm_ptr->node_comm != NULL) {
-            mpi_errno = MPIR_Bcast_impl(recvbuf, count, datatype, 0, comm_ptr->node_comm);
+            mpi_errno = MPIR_Bcast_impl(recvbuf, count, datatype, 0, comm_ptr->node_comm, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -231,16 +237,18 @@
         /* heterogeneous. To get the same result on all processes, we
            do a reduce to 0 and then broadcast. */
         mpi_errno = MPIR_Reduce_impl ( sendbuf, recvbuf, count, datatype,
-                                       op, 0, comm_ptr );
+                                       op, 0, comm_ptr, errflag );
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
 
-        mpi_errno = MPIR_Bcast_impl( recvbuf, count, datatype, 0, comm_ptr );
+        mpi_errno = MPIR_Bcast_impl( recvbuf, count, datatype, 0, comm_ptr, errflag );
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -314,11 +322,12 @@
         
         if (rank < 2*rem) {
             if (rank % 2 == 0) { /* even */
-                mpi_errno = MPIC_Send(recvbuf, count, 
-                                      datatype, rank+1,
-                                      MPIR_ALLREDUCE_TAG, comm);
+                mpi_errno = MPIC_Send_ft(recvbuf, count, 
+                                         datatype, rank+1,
+                                         MPIR_ALLREDUCE_TAG, comm, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -329,12 +338,13 @@
                 newrank = -1; 
             }
             else { /* odd */
-                mpi_errno = MPIC_Recv(tmp_buf, count, 
-                                      datatype, rank-1,
-                                      MPIR_ALLREDUCE_TAG, comm,
-                                      MPI_STATUS_IGNORE);
+                mpi_errno = MPIC_Recv_ft(tmp_buf, count, 
+                                         datatype, rank-1,
+                                         MPIR_ALLREDUCE_TAG, comm,
+                                         MPI_STATUS_IGNORE, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -381,13 +391,14 @@
 
                     /* Send the most current data, which is in recvbuf. Recv
                        into tmp_buf */ 
-                    mpi_errno = MPIC_Sendrecv(recvbuf, count, datatype, 
-                                              dst, MPIR_ALLREDUCE_TAG, tmp_buf,
-                                              count, datatype, dst,
-                                              MPIR_ALLREDUCE_TAG, comm,
-                                              MPI_STATUS_IGNORE);
+                    mpi_errno = MPIC_Sendrecv_ft(recvbuf, count, datatype, 
+                                                 dst, MPIR_ALLREDUCE_TAG, tmp_buf,
+                                                 count, datatype, dst,
+                                                 MPIR_ALLREDUCE_TAG, comm,
+                                                 MPI_STATUS_IGNORE, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
@@ -477,17 +488,18 @@
                            send_cnt, recv_cnt, last_idx);
                            */
                     /* Send data from recvbuf. Recv into tmp_buf */ 
-                    mpi_errno = MPIC_Sendrecv((char *) recvbuf +
-                                              disps[send_idx]*extent,
-                                              send_cnt, datatype,  
-                                              dst, MPIR_ALLREDUCE_TAG, 
-                                              (char *) tmp_buf +
-                                              disps[recv_idx]*extent,
-                                              recv_cnt, datatype, dst,
-                                              MPIR_ALLREDUCE_TAG, comm,
-                                              MPI_STATUS_IGNORE);
+                    mpi_errno = MPIC_Sendrecv_ft((char *) recvbuf +
+                                                 disps[send_idx]*extent,
+                                                 send_cnt, datatype,  
+                                                 dst, MPIR_ALLREDUCE_TAG, 
+                                                 (char *) tmp_buf +
+                                                 disps[recv_idx]*extent,
+                                                 recv_cnt, datatype, dst,
+                                                 MPIR_ALLREDUCE_TAG, comm,
+                                                 MPI_STATUS_IGNORE, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
@@ -541,17 +553,18 @@
                             recv_cnt += cnts[i];
                     }
 
-                    mpi_errno = MPIC_Sendrecv((char *) recvbuf +
-                                              disps[send_idx]*extent,
-                                              send_cnt, datatype,  
-                                              dst, MPIR_ALLREDUCE_TAG, 
-                                              (char *) recvbuf +
-                                              disps[recv_idx]*extent,
-                                              recv_cnt, datatype, dst,
-                                              MPIR_ALLREDUCE_TAG, comm,
-                                              MPI_STATUS_IGNORE);
+                    mpi_errno = MPIC_Sendrecv_ft((char *) recvbuf +
+                                                 disps[send_idx]*extent,
+                                                 send_cnt, datatype,  
+                                                 dst, MPIR_ALLREDUCE_TAG, 
+                                                 (char *) recvbuf +
+                                                 disps[recv_idx]*extent,
+                                                 recv_cnt, datatype, dst,
+                                                 MPIR_ALLREDUCE_TAG, comm,
+                                                 MPI_STATUS_IGNORE, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
@@ -568,16 +581,17 @@
            (rank-1), the ranks who didn't participate above. */
         if (rank < 2*rem) {
             if (rank % 2)  /* odd */
-                mpi_errno = MPIC_Send(recvbuf, count, 
-                                      datatype, rank-1,
-                                      MPIR_ALLREDUCE_TAG, comm);
+                mpi_errno = MPIC_Send_ft(recvbuf, count, 
+                                         datatype, rank-1,
+                                         MPIR_ALLREDUCE_TAG, comm, errflag);
             else  /* even */
-                mpi_errno = MPIC_Recv(recvbuf, count,
-                                      datatype, rank+1,
-                                      MPIR_ALLREDUCE_TAG, comm,
-                                      MPI_STATUS_IGNORE);
+                mpi_errno = MPIC_Recv_ft(recvbuf, count,
+                                         datatype, rank+1,
+                                         MPIR_ALLREDUCE_TAG, comm,
+                                         MPI_STATUS_IGNORE, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -613,7 +627,8 @@
     int count, 
     MPI_Datatype datatype, 
     MPI_Op op, 
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag )
 {
 /* Intercommunicator Allreduce.
    We first do an intercommunicator reduce to rank 0 on left group,
@@ -635,9 +650,10 @@
         /* reduce from right group to rank 0*/
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
         mpi_errno = MPIR_Reduce_inter(sendbuf, recvbuf, count, datatype, op,
-				      root, comm_ptr);
+				      root, comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -645,9 +661,10 @@
         /* reduce to rank 0 of right group */
         root = 0;
         mpi_errno = MPIR_Reduce_inter(sendbuf, recvbuf, count, datatype, op,
-				      root, comm_ptr);
+				      root, comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -656,9 +673,10 @@
         /* reduce to rank 0 of left group */
         root = 0;
         mpi_errno = MPIR_Reduce_inter(sendbuf, recvbuf, count, datatype, op,
-				      root, comm_ptr);
+				      root, comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -666,9 +684,10 @@
         /* reduce from right group to rank 0 */
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
         mpi_errno = MPIR_Reduce_inter(sendbuf, recvbuf, count, datatype, op,
-				      root, comm_ptr);
+				      root, comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -680,9 +699,10 @@
 
     newcomm_ptr = comm_ptr->local_comm;
 
-    mpi_errno = MPIR_Bcast_impl(recvbuf, count, datatype, 0, newcomm_ptr);
+    mpi_errno = MPIR_Bcast_impl(recvbuf, count, datatype, 0, newcomm_ptr, errflag);
     if (mpi_errno) {
         /* for communication errors, just record the error but continue */
+        *errflag = TRUE;
         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
     }
@@ -690,6 +710,9 @@
   fn_exit:
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
     return mpi_errno;
 
   fn_fail:
@@ -704,18 +727,19 @@
 #define FUNCNAME MPIR_Allreduce
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPIR_Allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr)
+int MPIR_Allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr,
+                   int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
 
     if (comm_ptr->comm_kind == MPID_INTRACOMM) {
         /* intracommunicator */
-        mpi_errno = MPIR_Allreduce_intra(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+        mpi_errno = MPIR_Allreduce_intra(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
     else {
         /* intercommunicator */
-        mpi_errno = MPIR_Allreduce_inter(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+        mpi_errno = MPIR_Allreduce_inter(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -734,25 +758,26 @@
 #define FUNCNAME MPIR_Allreduce_impl
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPIR_Allreduce_impl(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr)
+int MPIR_Allreduce_impl(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr,
+                        int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
 
     if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Allreduce != NULL)
     {
-	mpi_errno = comm_ptr->coll_fns->Allreduce(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+	mpi_errno = comm_ptr->coll_fns->Allreduce(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
     else
     {
         if (comm_ptr->comm_kind == MPID_INTRACOMM) {
             /* intracommunicator */
-            mpi_errno = MPIR_Allreduce_intra(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+            mpi_errno = MPIR_Allreduce_intra(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 	}
         else {
             /* intercommunicator */
-            mpi_errno = MPIR_Allreduce_inter(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+            mpi_errno = MPIR_Allreduce_inter(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         }
     }
@@ -803,6 +828,7 @@
     static const char FCNAME[] = "MPI_Allreduce";
     int mpi_errno = MPI_SUCCESS;
     MPID_Comm *comm_ptr = NULL;
+    int errflag = FALSE;
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLREDUCE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -875,7 +901,7 @@
 
     /* ... body of routine ...  */
 
-    mpi_errno = MPIR_Allreduce_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+    mpi_errno = MPIR_Allreduce_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, &errflag);
     if (mpi_errno) goto fn_fail;
 
     /* ... end of body of routine ... */

Modified: mpich2/trunk/src/mpi/coll/alltoall.c
===================================================================
--- mpich2/trunk/src/mpi/coll/alltoall.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/alltoall.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -82,7 +82,8 @@
     void *recvbuf, 
     int recvcount, 
     MPI_Datatype recvtype, 
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag )
 {
     int          comm_size, i, j, pof2;
     MPI_Aint     sendtype_extent, recvtype_extent;
@@ -134,26 +135,28 @@
             for (j = i; j < comm_size; ++j) {
                 if (rank == i) {
                     /* also covers the (rank == i && rank == j) case */
-                    mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + j*recvcount*recvtype_extent),
-                                                      recvcount, recvtype,
-                                                      j, MPIR_ALLTOALL_TAG,
-                                                      j, MPIR_ALLTOALL_TAG,
-                                                      comm, &status);
+                    mpi_errno = MPIC_Sendrecv_replace_ft(((char *)recvbuf + j*recvcount*recvtype_extent),
+                                                         recvcount, recvtype,
+                                                         j, MPIR_ALLTOALL_TAG,
+                                                         j, MPIR_ALLTOALL_TAG,
+                                                         comm, &status, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
                 }
                 else if (rank == j) {
                     /* same as above with i/j args reversed */
-                    mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + i*recvcount*recvtype_extent),
-                                                      recvcount, recvtype,
-                                                      i, MPIR_ALLTOALL_TAG,
-                                                      i, MPIR_ALLTOALL_TAG,
-                                                      comm, &status);
+                    mpi_errno = MPIC_Sendrecv_replace_ft(((char *)recvbuf + i*recvcount*recvtype_extent),
+                                                         recvcount, recvtype,
+                                                         i, MPIR_ALLTOALL_TAG,
+                                                         i, MPIR_ALLTOALL_TAG,
+                                                         comm, &status, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
@@ -221,12 +224,13 @@
             mpi_errno = MPIR_Pack_impl(recvbuf, 1, newtype, tmp_buf, pack_size, &position);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
-            mpi_errno = MPIC_Sendrecv(tmp_buf, position, MPI_PACKED, dst,
-                                      MPIR_ALLTOALL_TAG, recvbuf, 1, newtype,
-                                      src, MPIR_ALLTOALL_TAG, comm,
-                                      MPI_STATUS_IGNORE);
+            mpi_errno = MPIC_Sendrecv_ft(tmp_buf, position, MPI_PACKED, dst,
+                                         MPIR_ALLTOALL_TAG, recvbuf, 1, newtype,
+                                         src, MPIR_ALLTOALL_TAG, comm,
+                                         MPI_STATUS_IGNORE, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -306,17 +310,18 @@
             my_tree_root <<= i;
             
             if (dst < comm_size) {
-                mpi_errno = MPIC_Sendrecv(((char *)tmp_buf +
-                                           my_tree_root*sendbuf_extent),
-                                          curr_cnt, sendtype,
-                                          dst, MPIR_ALLTOALL_TAG, 
-                                          ((char *)tmp_buf +
-                                           dst_tree_root*sendbuf_extent),
-					  sendbuf_extent*(comm_size-dst_tree_root),
-                                          sendtype, dst, MPIR_ALLTOALL_TAG, 
-                                          comm, &status);
+                mpi_errno = MPIC_Sendrecv_ft(((char *)tmp_buf +
+                                              my_tree_root*sendbuf_extent),
+                                             curr_cnt, sendtype,
+                                             dst, MPIR_ALLTOALL_TAG, 
+                                             ((char *)tmp_buf +
+                                              dst_tree_root*sendbuf_extent),
+                                             sendbuf_extent*(comm_size-dst_tree_root),
+                                             sendtype, dst, MPIR_ALLTOALL_TAG, 
+                                             comm, &status, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     last_recv_cnt = 0;
@@ -363,13 +368,14 @@
                         (rank < tree_root + nprocs_completed)
                         && (dst >= tree_root + nprocs_completed)) {
                         /* send the data received in this step above */
-                        mpi_errno = MPIC_Send(((char *)tmp_buf +
-                                               dst_tree_root*sendbuf_extent),
-                                              last_recv_cnt, sendtype,
-                                              dst, MPIR_ALLTOALL_TAG,
-                                              comm);  
+                        mpi_errno = MPIC_Send_ft(((char *)tmp_buf +
+                                                  dst_tree_root*sendbuf_extent),
+                                                 last_recv_cnt, sendtype,
+                                                 dst, MPIR_ALLTOALL_TAG,
+                                                 comm, errflag);  
                         if (mpi_errno) {
                             /* for communication errors, just record the error but continue */
+                            *errflag = TRUE;
                             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                         }
@@ -379,14 +385,15 @@
                     else if ((dst < rank) && 
                              (dst < tree_root + nprocs_completed) &&
                              (rank >= tree_root + nprocs_completed)) {
-                        mpi_errno = MPIC_Recv(((char *)tmp_buf +
-                                               dst_tree_root*sendbuf_extent),
-					      sendbuf_extent*(comm_size-dst_tree_root),
-                                              sendtype,   
-                                              dst, MPIR_ALLTOALL_TAG,
-                                              comm, &status); 
+                        mpi_errno = MPIC_Recv_ft(((char *)tmp_buf +
+                                                  dst_tree_root*sendbuf_extent),
+                                                 sendbuf_extent*(comm_size-dst_tree_root),
+                                                 sendtype,   
+                                                 dst, MPIR_ALLTOALL_TAG,
+                                                 comm, &status, errflag); 
                         if (mpi_errno) {
                             /* for communication errors, just record the error but continue */
+                            *errflag = TRUE;
                             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                             last_recv_cnt = 0;
@@ -451,26 +458,26 @@
             /* do the communication -- post ss sends and receives: */
             for ( i=0; i<ss; i++ ) { 
                 dst = (rank+i+ii) % comm_size;
-                mpi_errno = MPIC_Irecv((char *)recvbuf +
-                                       dst*recvcount*recvtype_extent, 
-                                       recvcount, recvtype, dst,
-                                       MPIR_ALLTOALL_TAG, comm,
-                                       &reqarray[i]);
+                mpi_errno = MPIC_Irecv_ft((char *)recvbuf +
+                                          dst*recvcount*recvtype_extent, 
+                                          recvcount, recvtype, dst,
+                                          MPIR_ALLTOALL_TAG, comm,
+                                          &reqarray[i]);
                 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
             }
 
             for ( i=0; i<ss; i++ ) { 
                 dst = (rank-i-ii+comm_size) % comm_size;
-                mpi_errno = MPIC_Isend((char *)sendbuf +
-                                       dst*sendcount*sendtype_extent, 
-                                       sendcount, sendtype, dst,
-                                       MPIR_ALLTOALL_TAG, comm,
-                                       &reqarray[i+ss]);
+                mpi_errno = MPIC_Isend_ft((char *)sendbuf +
+                                          dst*sendcount*sendtype_extent, 
+                                          sendcount, sendtype, dst,
+                                          MPIR_ALLTOALL_TAG, comm,
+                                          &reqarray[i+ss], errflag);
                 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
             }
   
             /* ... then wait for them to finish: */
-            mpi_errno = MPIR_Waitall_impl(2*ss,reqarray,starray);
+            mpi_errno = MPIC_Waitall_ft(2*ss,reqarray,starray, errflag);
             if (mpi_errno && mpi_errno != MPI_ERR_IN_STATUS) MPIU_ERR_POP(mpi_errno);
             
             /* --BEGIN ERROR HANDLING-- */
@@ -480,6 +487,7 @@
                         mpi_errno = starray[j].MPI_ERROR;
                         if (mpi_errno) {
                             /* for communication errors, just record the error but continue */
+                            *errflag = TRUE;
                             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                         }
@@ -524,16 +532,17 @@
                 dst = (rank + i) % comm_size;
             }
 
-            mpi_errno = MPIC_Sendrecv(((char *)sendbuf +
-                                       dst*sendcount*sendtype_extent), 
-                                      sendcount, sendtype, dst,
-                                      MPIR_ALLTOALL_TAG, 
-                                      ((char *)recvbuf +
-                                       src*recvcount*recvtype_extent),
-                                      recvcount, recvtype, src,
-                                      MPIR_ALLTOALL_TAG, comm, &status);
+            mpi_errno = MPIC_Sendrecv_ft(((char *)sendbuf +
+                                          dst*sendcount*sendtype_extent), 
+                                         sendcount, sendtype, dst,
+                                         MPIR_ALLTOALL_TAG, 
+                                         ((char *)recvbuf +
+                                          src*recvcount*recvtype_extent),
+                                         recvcount, recvtype, src,
+                                         MPIR_ALLTOALL_TAG, comm, &status, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -546,6 +555,9 @@
     MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
     return mpi_errno;
  fn_fail:
     if (newtype != MPI_DATATYPE_NULL)
@@ -567,7 +579,8 @@
     void *recvbuf, 
     int recvcount, 
     MPI_Datatype recvtype, 
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag )
 {
 /* Intercommunicator alltoall. We use a pairwise exchange algorithm
    similar to the one used in intracommunicator alltoall for long
@@ -623,12 +636,13 @@
             sendaddr = (char *)sendbuf + dst*sendcount*sendtype_extent;
         }
 
-        mpi_errno = MPIC_Sendrecv(sendaddr, sendcount, sendtype, dst, 
-                                  MPIR_ALLTOALL_TAG, recvaddr,
-                                  recvcount, recvtype, src,
-                                  MPIR_ALLTOALL_TAG, comm, &status);
+        mpi_errno = MPIC_Sendrecv_ft(sendaddr, sendcount, sendtype, dst,
+                                     MPIR_ALLTOALL_TAG, recvaddr,
+                                     recvcount, recvtype, src,
+                                     MPIR_ALLTOALL_TAG, comm, &status, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -639,6 +653,9 @@
     MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -651,20 +668,21 @@
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype,
                   void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                  MPID_Comm *comm_ptr)
+                  MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
         
     if (comm_ptr->comm_kind == MPID_INTRACOMM) {
         /* intracommunicator */
         mpi_errno = MPIR_Alltoall_intra(sendbuf, sendcount, sendtype,
-                                        recvbuf, recvcount, recvtype, comm_ptr);
+                                        recvbuf, recvcount, recvtype,
+                                        comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         /* intercommunicator */
         mpi_errno = MPIR_Alltoall_inter(sendbuf, sendcount, sendtype,
                                         recvbuf, recvcount, recvtype,
-                                        comm_ptr);
+                                        comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -680,17 +698,18 @@
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Alltoall_impl(void *sendbuf, int sendcount, MPI_Datatype sendtype,
                        void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                       MPID_Comm *comm_ptr)
+                       MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
     if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Alltoall != NULL) {
 	mpi_errno = comm_ptr->coll_fns->Alltoall(sendbuf, sendcount, sendtype,
                                                  recvbuf, recvcount, recvtype,
-                                                 comm_ptr);
+                                                 comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         mpi_errno = MPIR_Alltoall(sendbuf, sendcount, sendtype,
-                                  recvbuf, recvcount, recvtype, comm_ptr);
+                                  recvbuf, recvcount, recvtype,
+                                  comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
     
@@ -737,6 +756,7 @@
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Comm *comm_ptr = NULL;
+    int errflag = FALSE;
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLTOALL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -798,7 +818,7 @@
 
     /* ... body of routine ...  */
 
-    mpi_errno = MPIR_Alltoall_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr);
+    mpi_errno = MPIR_Alltoall_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr, &errflag);
     if (mpi_errno) goto fn_fail;
 
     /* ... end of body of routine ... */

Modified: mpich2/trunk/src/mpi/coll/alltoallv.c
===================================================================
--- mpich2/trunk/src/mpi/coll/alltoallv.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/alltoallv.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -64,11 +64,13 @@
 	int *recvcnts, 
 	int *rdispls, 
 	MPI_Datatype recvtype, 
-	MPID_Comm *comm_ptr )
+	MPID_Comm *comm_ptr,
+        int *errflag)
 {
     int        comm_size, i, j;
     MPI_Aint   send_extent, recv_extent;
     int        mpi_errno = MPI_SUCCESS;
+    int mpi_errno_ret = MPI_SUCCESS;
     MPI_Status *starray;
     MPI_Status status;
     MPI_Request *reqarray;
@@ -104,21 +106,32 @@
             for (j = i; j < comm_size; ++j) {
                 if (rank == i) {
                     /* also covers the (rank == i && rank == j) case */
-                    mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + rdispls[j]*recv_extent),
-                                                      recvcnts[j], recvtype,
-                                                      j, MPIR_ALLTOALLV_TAG,
-                                                      j, MPIR_ALLTOALLV_TAG,
-                                                      comm, &status);
-                    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+                    mpi_errno = MPIC_Sendrecv_replace_ft(((char *)recvbuf + rdispls[j]*recv_extent),
+                                                         recvcnts[j], recvtype,
+                                                         j, MPIR_ALLTOALLV_TAG,
+                                                         j, MPIR_ALLTOALLV_TAG,
+                                                         comm, &status, errflag);
+                    if (mpi_errno) {
+                        /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
+                        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+                        MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
+                    }
+
                 }
                 else if (rank == j) {
                     /* same as above with i/j args reversed */
-                    mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + rdispls[i]*recv_extent),
-                                                      recvcnts[i], recvtype,
-                                                      i, MPIR_ALLTOALLV_TAG,
-                                                      i, MPIR_ALLTOALLV_TAG,
-                                                      comm, &status);
-                    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+                    mpi_errno = MPIC_Sendrecv_replace_ft(((char *)recvbuf + rdispls[i]*recv_extent),
+                                                         recvcnts[i], recvtype,
+                                                         i, MPIR_ALLTOALLV_TAG,
+                                                         i, MPIR_ALLTOALLV_TAG,
+                                                         comm, &status, errflag);
+                    if (mpi_errno) {
+                        /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
+                        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+                        MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
+                    }
                 }
             }
         }
@@ -145,11 +158,16 @@
                     if (type_size) {
                         MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
                                                          rdispls[dst]*recv_extent);
-                        mpi_errno = MPIC_Irecv((char *)recvbuf+rdispls[dst]*recv_extent,
-                                               recvcnts[dst], recvtype, dst,
-                                               MPIR_ALLTOALLV_TAG, comm,
-                                               &reqarray[req_cnt]);
-                        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+                        mpi_errno = MPIC_Irecv_ft((char *)recvbuf+rdispls[dst]*recv_extent,
+                                                  recvcnts[dst], recvtype, dst,
+                                                  MPIR_ALLTOALLV_TAG, comm,
+                                                  &reqarray[req_cnt]);
+                        if (mpi_errno) {
+                            /* for communication errors, just record the error but continue */
+                            *errflag = TRUE;
+                            MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+                            MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
+                        }
                         req_cnt++;
                     }
                 }
@@ -162,17 +180,22 @@
                     if (type_size) {
                         MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
                                                          sdispls[dst]*send_extent);
-                        mpi_errno = MPIC_Isend((char *)sendbuf+sdispls[dst]*send_extent,
-                                               sendcnts[dst], sendtype, dst,
-                                               MPIR_ALLTOALLV_TAG, comm,
-                                               &reqarray[req_cnt]);
-                        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+                        mpi_errno = MPIC_Isend_ft((char *)sendbuf+sdispls[dst]*send_extent,
+                                                  sendcnts[dst], sendtype, dst,
+                                                  MPIR_ALLTOALLV_TAG, comm,
+                                                  &reqarray[req_cnt], errflag);
+                        if (mpi_errno) {
+                            /* for communication errors, just record the error but continue */
+                            *errflag = TRUE;
+                            MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+                            MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
+                        }
                         req_cnt++;
                     }
                 }
             }
 
-            mpi_errno = MPIR_Waitall_impl(req_cnt, reqarray, starray);
+            mpi_errno = MPIC_Waitall_ft(req_cnt, reqarray, starray, errflag);
             if (mpi_errno && mpi_errno != MPI_ERR_IN_STATUS) MPIU_ERR_POP(mpi_errno);
 
             /* --BEGIN ERROR HANDLING-- */
@@ -180,7 +203,12 @@
                 for (i=0; i<req_cnt; i++) {
                     if (starray[i].MPI_ERROR != MPI_SUCCESS) {
                         mpi_errno = starray[i].MPI_ERROR;
-                        MPIU_ERR_POP(mpi_errno);
+                        if (mpi_errno) {
+                            /* for communication errors, just record the error but continue */
+                            *errflag = TRUE;
+                            MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+                            MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
+                        }
                     }
                 }
             }
@@ -192,8 +220,14 @@
     /* check if multiple threads are calling this collective function */
     MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     MPIU_CHKLMEM_FREEALL();
-    return (mpi_errno);
 
+    if (mpi_errno_ret)
+        mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
+    return mpi_errno;
+
 fn_fail:
     goto fn_exit;
 }
@@ -214,7 +248,8 @@
     int *recvcnts, 
     int *rdispls, 
     MPI_Datatype recvtype, 
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag )
 {
 /* Intercommunicator alltoallv. We use a pairwise exchange algorithm
    similar to the one used in intracommunicator alltoallv. Since the
@@ -231,6 +266,7 @@
     int local_size, remote_size, max_size, i;
     MPI_Aint   send_extent, recv_extent;
     int        mpi_errno = MPI_SUCCESS;
+    int mpi_errno_ret = MPI_SUCCESS;
     MPI_Status status;
     int src, dst, rank, sendcount, recvcount;
     char *sendaddr, *recvaddr;
@@ -276,17 +312,26 @@
             sendcount = sendcnts[dst];
         }
 
-        mpi_errno = MPIC_Sendrecv(sendaddr, sendcount, sendtype, dst, 
-                                  MPIR_ALLTOALLV_TAG, recvaddr, recvcount, 
-                                  recvtype, src, MPIR_ALLTOALLV_TAG,
-                                  comm, &status);
-        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+        mpi_errno = MPIC_Sendrecv_ft(sendaddr, sendcount, sendtype, dst, 
+                                     MPIR_ALLTOALLV_TAG, recvaddr, recvcount, 
+                                     recvtype, src, MPIR_ALLTOALLV_TAG,
+                                     comm, &status, errflag);
+        if (mpi_errno) {
+            /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
+            MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+            MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
+        }
     }
 
  fn_exit:
     /* check if multiple threads are calling this collective function */
     MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
-    return (mpi_errno);
+    if (mpi_errno_ret)
+        mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+    return mpi_errno;
  fn_fail:
     goto fn_exit;
 }
@@ -298,7 +343,7 @@
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Alltoallv(void *sendbuf, int *sendcnts, int *sdispls, MPI_Datatype sendtype,
                    void *recvbuf, int *recvcnts, int *rdispls, MPI_Datatype recvtype,
-                   MPID_Comm *comm_ptr)
+                   MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
         
@@ -306,13 +351,13 @@
         /* intracommunicator */
         mpi_errno = MPIR_Alltoallv_intra(sendbuf, sendcnts, sdispls,
                                          sendtype, recvbuf, recvcnts,
-                                         rdispls, recvtype, comm_ptr);
+                                         rdispls, recvtype, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         /* intercommunicator */
         mpi_errno = MPIR_Alltoallv_inter(sendbuf, sendcnts, sdispls,
                                          sendtype, recvbuf, recvcnts,
-                                         rdispls, recvtype, comm_ptr);
+                                         rdispls, recvtype, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -328,19 +373,19 @@
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Alltoallv_impl(void *sendbuf, int *sendcnts, int *sdispls, MPI_Datatype sendtype,
                         void *recvbuf, int *recvcnts, int *rdispls, MPI_Datatype recvtype,
-                        MPID_Comm *comm_ptr)
+                        MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
         
     if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Alltoallv != NULL) {
 	mpi_errno = comm_ptr->coll_fns->Alltoallv(sendbuf, sendcnts, sdispls,
                                                  sendtype, recvbuf, recvcnts,
-                                                 rdispls, recvtype, comm_ptr);
+                                                 rdispls, recvtype, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         mpi_errno = MPIR_Alltoallv(sendbuf, sendcnts, sdispls,
                                    sendtype, recvbuf, recvcnts,
-                                   rdispls, recvtype, comm_ptr);
+                                   rdispls, recvtype, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -400,6 +445,7 @@
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Comm *comm_ptr = NULL;
+    int errflag = FALSE;
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLTOALLV);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -485,7 +531,7 @@
 
     mpi_errno = MPIR_Alltoallv_impl(sendbuf, sendcnts, sdispls,
                                     sendtype, recvbuf, recvcnts,
-                                    rdispls, recvtype, comm_ptr);
+                                    rdispls, recvtype, comm_ptr, &errflag);
     if (mpi_errno) goto fn_fail;
 
     /* ... end of body of routine ... */

Modified: mpich2/trunk/src/mpi/coll/alltoallw.c
===================================================================
--- mpich2/trunk/src/mpi/coll/alltoallw.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/alltoallw.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -59,7 +59,8 @@
 	int *recvcnts, 
 	int *rdispls, 
 	MPI_Datatype *recvtypes, 
-	MPID_Comm *comm_ptr )
+	MPID_Comm *comm_ptr,
+        int *errflag )
 {
     int        comm_size, i, j;
     int mpi_errno = MPI_SUCCESS;
@@ -96,26 +97,28 @@
             for (j = i; j < comm_size; ++j) {
                 if (rank == i) {
                     /* also covers the (rank == i && rank == j) case */
-                    mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + rdispls[j]),
-                                                      recvcnts[j], recvtypes[j],
-                                                      j, MPIR_ALLTOALLW_TAG,
-                                                      j, MPIR_ALLTOALLW_TAG,
-                                                      comm, &status);
+                    mpi_errno = MPIC_Sendrecv_replace_ft(((char *)recvbuf + rdispls[j]),
+                                                         recvcnts[j], recvtypes[j],
+                                                         j, MPIR_ALLTOALLW_TAG,
+                                                         j, MPIR_ALLTOALLW_TAG,
+                                                         comm, &status, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
                 }
                 else if (rank == j) {
                     /* same as above with i/j args reversed */
-                    mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + rdispls[i]),
-                                                      recvcnts[i], recvtypes[i],
-                                                      i, MPIR_ALLTOALLW_TAG,
-                                                      i, MPIR_ALLTOALLW_TAG,
-                                                      comm, &status);
+                    mpi_errno = MPIC_Sendrecv_replace_ft(((char *)recvbuf + rdispls[i]),
+                                                         recvcnts[i], recvtypes[i],
+                                                         i, MPIR_ALLTOALLW_TAG,
+                                                         i, MPIR_ALLTOALLW_TAG,
+                                                         comm, &status, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
@@ -141,10 +144,10 @@
                 if (recvcnts[dst]) {
                     MPID_Datatype_get_size_macro(recvtypes[dst], type_size);
                     if (type_size) {
-                        mpi_errno = MPIC_Irecv((char *)recvbuf+rdispls[dst],
-                                               recvcnts[dst], recvtypes[dst], dst,
-                                               MPIR_ALLTOALLW_TAG, comm,
-                                               &reqarray[outstanding_requests]);
+                        mpi_errno = MPIC_Irecv_ft((char *)recvbuf+rdispls[dst],
+                                                  recvcnts[dst], recvtypes[dst], dst,
+                                                  MPIR_ALLTOALLW_TAG, comm,
+                                                  &reqarray[outstanding_requests]);
                         if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 
                         outstanding_requests++;
@@ -157,10 +160,10 @@
                 if (sendcnts[dst]) {
                     MPID_Datatype_get_size_macro(sendtypes[dst], type_size);
                     if (type_size) {
-                        mpi_errno = MPIC_Isend((char *)sendbuf+sdispls[dst],
-                                               sendcnts[dst], sendtypes[dst], dst,
-                                               MPIR_ALLTOALLW_TAG, comm,
-                                               &reqarray[outstanding_requests]);
+                        mpi_errno = MPIC_Isend_ft((char *)sendbuf+sdispls[dst],
+                                                  sendcnts[dst], sendtypes[dst], dst,
+                                                  MPIR_ALLTOALLW_TAG, comm,
+                                                  &reqarray[outstanding_requests], errflag);
                         if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 
                         outstanding_requests++;
@@ -168,7 +171,7 @@
                 }
             }
 
-            mpi_errno = MPIR_Waitall_impl(outstanding_requests, reqarray, starray);
+            mpi_errno = MPIC_Waitall_ft(outstanding_requests, reqarray, starray, errflag);
             if (mpi_errno && mpi_errno != MPI_ERR_IN_STATUS) MPIU_ERR_POP(mpi_errno);
             
             /* --BEGIN ERROR HANDLING-- */
@@ -178,6 +181,7 @@
                         mpi_errno = starray[i].MPI_ERROR;
                         if (mpi_errno) {
                             /* for communication errors, just record the error but continue */
+                            *errflag = TRUE;
                             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                         }
@@ -200,14 +204,15 @@
         for (i=1; i<comm_size; i++) {
             src = (rank - i + comm_size) % comm_size;
             dst = (rank + i) % comm_size;
-            mpi_errno = MPIC_Sendrecv(((char *)sendbuf+sdispls[dst]), 
-                                      sendcnts[dst], sendtypes[dst], dst,
-                                      MPIR_ALLTOALLW_TAG, 
-                                      ((char *)recvbuf+rdispls[src]), 
-                                      recvcnts[src], recvtypes[dst], src,
-                                      MPIR_ALLTOALLW_TAG, comm, &status);
+            mpi_errno = MPIC_Sendrecv_ft(((char *)sendbuf+sdispls[dst]), 
+                                         sendcnts[dst], sendtypes[dst], dst,
+                                         MPIR_ALLTOALLW_TAG, 
+                                         ((char *)recvbuf+rdispls[src]), 
+                                         recvcnts[src], recvtypes[dst], src,
+                                         MPIR_ALLTOALLW_TAG, comm, &status, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -221,6 +226,8 @@
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
 
   fn_fail:
@@ -242,7 +249,8 @@
 	int *recvcnts, 
 	int *rdispls, 
 	MPI_Datatype *recvtypes, 
-	MPID_Comm *comm_ptr )
+	MPID_Comm *comm_ptr,
+        int *errflag )
 {
 /* Intercommunicator alltoallw. We use a pairwise exchange algorithm
    similar to the one used in intracommunicator alltoallw. Since the
@@ -300,12 +308,13 @@
             sendtype = sendtypes[dst];
         }
 
-        mpi_errno = MPIC_Sendrecv(sendaddr, sendcount, sendtype, 
-                                  dst, MPIR_ALLTOALLW_TAG, recvaddr, 
-                                  recvcount, recvtype, src,
-                                  MPIR_ALLTOALLW_TAG, comm, &status);
+        mpi_errno = MPIC_Sendrecv_ft(sendaddr, sendcount, sendtype,
+                                     dst, MPIR_ALLTOALLW_TAG, recvaddr,
+                                     recvcount, recvtype, src,
+                                     MPIR_ALLTOALLW_TAG, comm, &status, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -316,6 +325,8 @@
     MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -327,7 +338,7 @@
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Alltoallw(void *sendbuf, int *sendcnts, int *sdispls, MPI_Datatype *sendtypes,
                    void *recvbuf, int *recvcnts, int *rdispls, MPI_Datatype *recvtypes,
-                   MPID_Comm *comm_ptr)
+                   MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
         
@@ -335,13 +346,13 @@
         /* intracommunicator */
         mpi_errno = MPIR_Alltoallw_intra(sendbuf, sendcnts, sdispls,
                                          sendtypes, recvbuf, recvcnts,
-                                         rdispls, recvtypes, comm_ptr);
+                                         rdispls, recvtypes, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         /* intercommunicator */
         mpi_errno = MPIR_Alltoallw_inter(sendbuf, sendcnts, sdispls,
                                          sendtypes, recvbuf, recvcnts,
-                                         rdispls, recvtypes, comm_ptr);
+                                         rdispls, recvtypes, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -357,19 +368,19 @@
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Alltoallw_impl(void *sendbuf, int *sendcnts, int *sdispls, MPI_Datatype *sendtypes,
                         void *recvbuf, int *recvcnts, int *rdispls, MPI_Datatype *recvtypes,
-                        MPID_Comm *comm_ptr)
+                        MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
         
     if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Alltoallw != NULL) {
 	mpi_errno = comm_ptr->coll_fns->Alltoallw(sendbuf, sendcnts, sdispls,
                                                   sendtypes, recvbuf, recvcnts,
-                                                  rdispls, recvtypes, comm_ptr);
+                                                  rdispls, recvtypes, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         mpi_errno = MPIR_Alltoallw(sendbuf, sendcnts, sdispls,
                                    sendtypes, recvbuf, recvcnts,
-                                   rdispls, recvtypes, comm_ptr);
+                                   rdispls, recvtypes, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -429,6 +440,7 @@
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Comm *comm_ptr = NULL;
+    int errflag = FALSE;
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLTOALLW);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -522,7 +534,7 @@
 
     mpi_errno = MPIR_Alltoallw_impl(sendbuf, sendcnts, sdispls,
                                     sendtypes, recvbuf, recvcnts,
-                                    rdispls, recvtypes, comm_ptr);
+                                    rdispls, recvtypes, comm_ptr, &errflag);
     if (mpi_errno) goto fn_fail;
 
     /* ... end of body of routine ... */

Modified: mpich2/trunk/src/mpi/coll/barrier.c
===================================================================
--- mpich2/trunk/src/mpi/coll/barrier.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/barrier.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -74,9 +74,9 @@
         dst = (rank + mask) % size;
         src = (rank - mask + size) % size;
         mpi_errno = MPIC_Sendrecv(NULL, 0, MPI_BYTE, dst,
-                                  MPIR_BARRIER_TAG, NULL, 0, MPI_BYTE,
-                                  src, MPIR_BARRIER_TAG, comm,
-                                  MPI_STATUS_IGNORE);
+                                     MPIR_BARRIER_TAG, NULL, 0, MPI_BYTE,
+                                     src, MPIR_BARRIER_TAG, comm,
+                                     MPI_STATUS_IGNORE);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
@@ -137,7 +137,8 @@
     int mpi_errno_ret = MPI_SUCCESS;
     int i = 0;
     MPID_Comm *newcomm_ptr = NULL;
-
+    int errflag = FALSE;
+    
     rank = comm_ptr->rank;
 
     /* Get the local intracommunicator */
@@ -166,38 +167,42 @@
     if (comm_ptr->is_low_group) {
         /* bcast to right*/
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
-        mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr);
+        mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, &errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
+        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
         /* receive bcast from right */
         root = 0;
-        mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr);
+        mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, &errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
+        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     }
     else {
         /* receive bcast from left */
         root = 0;
-        mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr);
+        mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, &errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
+        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
         /* bcast to left */
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
-        mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr);
+        mpi_errno = MPIR_Bcast_inter(&i, 1, MPI_BYTE, root, comm_ptr, &errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
+        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     }
  fn_exit:
     if (mpi_errno_ret)
@@ -207,7 +212,8 @@
     goto fn_exit;
 }
 
-/* MPIR_Barrier performs an barrier using point-to-point messages.
+/* MPIR_Barrier performs an barrier using poin        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+t-to-point messages.
    This is intended to be used by device-specific implementations of
    barrier.  In all other cases MPIR_Barrier_impl should be used. */
 #undef FUNCNAME
@@ -247,7 +253,7 @@
 {
     int mpi_errno = MPI_SUCCESS;
     int mpi_errno_ret = MPI_SUCCESS;
-    
+    int errflag = FALSE;
     if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Barrier != NULL)
     {
 	mpi_errno = comm_ptr->coll_fns->Barrier(comm_ptr);
@@ -285,12 +291,13 @@
                 if (comm_ptr->node_comm != NULL)
                 {
 		    int i=0;
-                    mpi_errno = MPIR_Bcast_impl(&i, 1, MPI_BYTE, 0, comm_ptr->node_comm);
+                    mpi_errno = MPIR_Bcast_impl(&i, 1, MPI_BYTE, 0, comm_ptr->node_comm, &errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
+                    MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
                 }
             }
             else {

Modified: mpich2/trunk/src/mpi/coll/bcast.c
===================================================================
--- mpich2/trunk/src/mpi/coll/bcast.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/bcast.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -35,7 +35,8 @@
     int count, 
     MPI_Datatype datatype, 
     int root, 
-    MPID_Comm *comm_ptr)
+    MPID_Comm *comm_ptr,
+    int *errflag)
 {
     int        rank, comm_size, src, dst;
     int        relative_rank, mask;
@@ -131,13 +132,14 @@
             src = rank - mask; 
             if (src < 0) src += comm_size;
             if (!is_contig || !is_homogeneous)
-                mpi_errno = MPIC_Recv(tmp_buf,nbytes,MPI_BYTE,src,
-                                      MPIR_BCAST_TAG,comm,MPI_STATUS_IGNORE);
+                mpi_errno = MPIC_Recv_ft(tmp_buf,nbytes,MPI_BYTE,src,
+                                         MPIR_BCAST_TAG,comm,MPI_STATUS_IGNORE, errflag);
             else
-                mpi_errno = MPIC_Recv(buffer,count,datatype,src,
-                                      MPIR_BCAST_TAG,comm,MPI_STATUS_IGNORE);
+                mpi_errno = MPIC_Recv_ft(buffer,count,datatype,src,
+                                         MPIR_BCAST_TAG,comm,MPI_STATUS_IGNORE, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -165,13 +167,14 @@
             dst = rank + mask;
             if (dst >= comm_size) dst -= comm_size;
             if (!is_contig || !is_homogeneous)
-                mpi_errno = MPIC_Send(tmp_buf,nbytes,MPI_BYTE,dst,
-                                      MPIR_BCAST_TAG,comm);
+                mpi_errno = MPIC_Send_ft(tmp_buf,nbytes,MPI_BYTE,dst,
+                                         MPIR_BCAST_TAG,comm, errflag);
             else
-                mpi_errno = MPIC_Send(buffer,count,datatype,dst,
-                                      MPIR_BCAST_TAG,comm); 
+                mpi_errno = MPIC_Send_ft(buffer,count,datatype,dst,
+                                         MPIR_BCAST_TAG,comm, errflag); 
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -195,6 +198,8 @@
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -223,7 +228,8 @@
     int nbytes,
     void *tmp_buf,
     int is_contig,
-    int is_homogeneous)
+    int is_homogeneous,
+    int *errflag)
 {
     MPI_Status status;
     int        rank, comm_size, src, dst;
@@ -272,12 +278,13 @@
             }
             else
             {
-                mpi_errno = MPIC_Recv(((char *)tmp_buf +
-                                       relative_rank*scatter_size),
-                                      recv_size, MPI_BYTE, src,
-                                      MPIR_BCAST_TAG, comm, &status);
+                mpi_errno = MPIC_Recv_ft(((char *)tmp_buf +
+                                          relative_rank*scatter_size),
+                                         recv_size, MPI_BYTE, src,
+                                         MPIR_BCAST_TAG, comm, &status, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     curr_size = 0;
@@ -307,12 +314,13 @@
             {
                 dst = rank + mask;
                 if (dst >= comm_size) dst -= comm_size;
-                mpi_errno = MPIC_Send (((char *)tmp_buf +
-                                        scatter_size*(relative_rank+mask)),
-                                       send_size, MPI_BYTE, dst,
-                                       MPIR_BCAST_TAG, comm);
+                mpi_errno = MPIC_Send_ft(((char *)tmp_buf +
+                                          scatter_size*(relative_rank+mask)),
+                                         send_size, MPI_BYTE, dst,
+                                         MPIR_BCAST_TAG, comm, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -326,6 +334,8 @@
 fn_exit:
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -360,7 +370,8 @@
     int count, 
     MPI_Datatype datatype, 
     int root, 
-    MPID_Comm *comm_ptr)
+    MPID_Comm *comm_ptr,
+    int *errflag)
 {
     MPI_Status status;
     int rank, comm_size, dst;
@@ -439,9 +450,10 @@
 
 
     mpi_errno = scatter_for_bcast(buffer, count, datatype, root, comm_ptr,
-                                  nbytes, tmp_buf, is_contig, is_homogeneous);
+                                  nbytes, tmp_buf, is_contig, is_homogeneous, errflag);
     if (mpi_errno) {
         /* for communication errors, just record the error but continue */
+        *errflag = TRUE;
         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
     }
@@ -473,13 +485,14 @@
 
         if (relative_dst < comm_size)
         {
-            mpi_errno = MPIC_Sendrecv(((char *)tmp_buf + send_offset),
-                                      curr_size, MPI_BYTE, dst, MPIR_BCAST_TAG, 
-                                      ((char *)tmp_buf + recv_offset),
-                                      (nbytes-recv_offset < 0 ? 0 : nbytes-recv_offset), 
-                                      MPI_BYTE, dst, MPIR_BCAST_TAG, comm, &status);
+            mpi_errno = MPIC_Sendrecv_ft(((char *)tmp_buf + send_offset),
+                                         curr_size, MPI_BYTE, dst, MPIR_BCAST_TAG, 
+                                         ((char *)tmp_buf + recv_offset),
+                                         (nbytes-recv_offset < 0 ? 0 : nbytes-recv_offset), 
+                                         MPI_BYTE, dst, MPIR_BCAST_TAG, comm, &status, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 recv_size = 0;
@@ -546,14 +559,15 @@
 
                     /* printf("Rank %d, send to %d, offset %d, size %d\n", rank, dst, offset, recv_size);
                        fflush(stdout); */
-                    mpi_errno = MPIC_Send(((char *)tmp_buf + offset),
-                                          recv_size, MPI_BYTE, dst,
-                                          MPIR_BCAST_TAG, comm); 
+                    mpi_errno = MPIC_Send_ft(((char *)tmp_buf + offset),
+                                             recv_size, MPI_BYTE, dst,
+                                             MPIR_BCAST_TAG, comm, errflag); 
                     /* recv_size was set in the previous
                        receive. that's the amount of data to be
                        sent now. */
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
@@ -566,14 +580,15 @@
                 {
                     /* printf("Rank %d waiting to recv from rank %d\n",
                        relative_rank, dst); */
-                    mpi_errno = MPIC_Recv(((char *)tmp_buf + offset),
-                                          nbytes - offset, 
-                                          MPI_BYTE, dst, MPIR_BCAST_TAG,
-                                          comm, &status); 
+                    mpi_errno = MPIC_Recv_ft(((char *)tmp_buf + offset),
+                                             nbytes - offset, 
+                                             MPI_BYTE, dst, MPIR_BCAST_TAG,
+                                             comm, &status, errflag); 
                     /* nprocs_completed is also equal to the no. of processes
                        whose data we don't have */
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                         recv_size = 0;
@@ -608,6 +623,8 @@
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -638,7 +655,8 @@
     int count, 
     MPI_Datatype datatype, 
     int root, 
-    MPID_Comm *comm_ptr)
+    MPID_Comm *comm_ptr,
+    int *errflag)
 {
     int rank, comm_size;
     int relative_rank;
@@ -712,9 +730,10 @@
     scatter_size = (nbytes + comm_size - 1)/comm_size; /* ceiling division */
 
     mpi_errno = scatter_for_bcast(buffer, count, datatype, root, comm_ptr,
-                                  nbytes, tmp_buf, is_contig, is_homogeneous);
+                                  nbytes, tmp_buf, is_contig, is_homogeneous, errflag);
     if (mpi_errno) {
         /* for communication errors, just record the error but continue */
+        *errflag = TRUE;
         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
     }
@@ -745,17 +764,18 @@
     for (i=1; i<comm_size; i++)
     {
         mpi_errno = 
-            MPIC_Sendrecv((char *)tmp_buf +
-                          displs[(j-root+comm_size)%comm_size],  
-                          recvcnts[(j-root+comm_size)%comm_size],
-                          MPI_BYTE, right, MPIR_BCAST_TAG, 
-                          (char *)tmp_buf +
-                          displs[(jnext-root+comm_size)%comm_size], 
-                          recvcnts[(jnext-root+comm_size)%comm_size],  
-                          MPI_BYTE, left,   
-                          MPIR_BCAST_TAG, comm, MPI_STATUS_IGNORE);
+            MPIC_Sendrecv_ft((char *)tmp_buf +
+                             displs[(j-root+comm_size)%comm_size],  
+                             recvcnts[(j-root+comm_size)%comm_size],
+                             MPI_BYTE, right, MPIR_BCAST_TAG, 
+                             (char *)tmp_buf +
+                             displs[(jnext-root+comm_size)%comm_size], 
+                             recvcnts[(jnext-root+comm_size)%comm_size],  
+                             MPI_BYTE, left,   
+                             MPIR_BCAST_TAG, comm, MPI_STATUS_IGNORE, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -779,6 +799,8 @@
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -789,21 +811,22 @@
    Otherwise it invokes bcast_fn_ with the given args.
    
    NOTE: calls MPIU_ERR_POP on any failure, so a fn_fail label is needed. */
-#define MPIR_Bcast_fn_or_override(bcast_fn_,mpi_errno_,buffer_,count_,datatype_,root_,comm_ptr_) \
+#define MPIR_Bcast_fn_or_override(bcast_fn_,mpi_errno_,buffer_,count_,datatype_,root_,comm_ptr_,errflag_) \
     do {                                                                                         \
         if (comm_ptr_->coll_fns != NULL && comm_ptr_->coll_fns->Bcast != NULL)                   \
         {                                                                                        \
             /* --BEGIN USEREXTENSION-- */                                                        \
             mpi_errno_ = comm_ptr->coll_fns->Bcast(buffer_, count_,                              \
-                                                   datatype_, root_, comm_ptr_);                 \
+                                                   datatype_, root_, comm_ptr_, errflag_);       \
             /* --END USEREXTENSION-- */                                                          \
         }                                                                                        \
         else                                                                                     \
         {                                                                                        \
-            mpi_errno_ = bcast_fn_(buffer_, count_, datatype_, root_, comm_ptr_);                \
+            mpi_errno_ = bcast_fn_(buffer_, count_, datatype_, root_, comm_ptr_, errflag_);      \
         }                                                                                        \
         if (mpi_errno) {                                                                         \
             /* for communication errors, just record the error but continue */                   \
+            *(errflag_) = TRUE;                                                                  \
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");                                    \
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);                                              \
         }                                                                                        \
@@ -820,7 +843,8 @@
         int count, 
         MPI_Datatype datatype, 
         int root, 
-        MPID_Comm *comm_ptr)
+        MPID_Comm *comm_ptr,
+        int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
     int mpi_errno_ret = MPI_SUCCESS;
@@ -859,15 +883,16 @@
             MPIU_Get_intranode_rank(comm_ptr, root) > 0) /* is not the node root (0) */ 
         {                                                /* and is on our node (!-1) */
             if (root == comm_ptr->rank) {
-                mpi_errno = MPIC_Send(buffer,count,datatype,0,
-                                      MPIR_BCAST_TAG,comm_ptr->node_comm->handle); 
+                mpi_errno = MPIC_Send_ft(buffer,count,datatype,0,
+                                         MPIR_BCAST_TAG,comm_ptr->node_comm->handle, errflag);
             }
             else if (0 == comm_ptr->node_comm->rank) {
-                mpi_errno = MPIC_Recv(buffer,count,datatype,MPIU_Get_intranode_rank(comm_ptr, root),
-                                      MPIR_BCAST_TAG,comm_ptr->node_comm->handle,MPI_STATUS_IGNORE);
+                mpi_errno = MPIC_Recv_ft(buffer,count,datatype,MPIU_Get_intranode_rank(comm_ptr, root),
+                                         MPIR_BCAST_TAG,comm_ptr->node_comm->handle,MPI_STATUS_IGNORE, errflag);
             }
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -879,14 +904,14 @@
             MPIR_Bcast_fn_or_override(MPIR_Bcast_binomial, mpi_errno,
                                       buffer, count, datatype,
                                       MPIU_Get_internode_rank(comm_ptr, root),
-                                      comm_ptr->node_roots_comm);
+                                      comm_ptr->node_roots_comm, errflag);
         }
 
         /* perform the intranode broadcast on all except for the root's node */
         if (comm_ptr->node_comm != NULL)
         {
             MPIR_Bcast_fn_or_override(MPIR_Bcast_binomial, mpi_errno,
-                                      buffer, count, datatype, 0, comm_ptr->node_comm);
+                                      buffer, count, datatype, 0, comm_ptr->node_comm, errflag);
         }
     }
     else /* (nbytes > MPIR_PARAM_BCAST_SHORT_MSG_SIZE) && (comm_ptr->size >= MPIR_PARAM_BCAST_MIN_PROCS) */
@@ -908,7 +933,7 @@
                 MPIR_Bcast_fn_or_override(MPIR_Bcast_binomial, mpi_errno,
                                           buffer, count, datatype,
                                           MPIU_Get_intranode_rank(comm_ptr, root),
-                                          comm_ptr->node_comm);
+                                          comm_ptr->node_comm, errflag);
             }
 
             /* perform the internode broadcast */
@@ -919,14 +944,14 @@
                     MPIR_Bcast_fn_or_override(MPIR_Bcast_scatter_doubling_allgather, mpi_errno,
                                               buffer, count, datatype,
                                               MPIU_Get_internode_rank(comm_ptr, root),
-                                              comm_ptr->node_roots_comm);
+                                              comm_ptr->node_roots_comm, errflag);
                 }
                 else
                 {
                     MPIR_Bcast_fn_or_override(MPIR_Bcast_scatter_ring_allgather, mpi_errno,
                                               buffer, count, datatype,
                                               MPIU_Get_internode_rank(comm_ptr, root),
-                                              comm_ptr->node_roots_comm);
+                                              comm_ptr->node_roots_comm, errflag);
                 }
             }
 
@@ -938,7 +963,7 @@
                    bcast.  We need a more comprehensive system for selecting the
                    right algorithms here. */
                 MPIR_Bcast_fn_or_override(MPIR_Bcast_binomial, mpi_errno,
-                                          buffer, count, datatype, 0, comm_ptr->node_comm);
+                                          buffer, count, datatype, 0, comm_ptr->node_comm, errflag);
             }
         }
         else /* large msg or non-pof2 */
@@ -946,9 +971,10 @@
             /* FIXME It would be good to have an SMP-aware version of this
                algorithm that (at least approximately) minimized internode
                communication. */
-            mpi_errno = MPIR_Bcast_scatter_ring_allgather(buffer, count, datatype, root, comm_ptr);
+            mpi_errno = MPIR_Bcast_scatter_ring_allgather(buffer, count, datatype, root, comm_ptr, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -958,6 +984,8 @@
 fn_exit:
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -1016,7 +1044,8 @@
         int count, 
         MPI_Datatype datatype, 
         int root, 
-        MPID_Comm *comm_ptr )
+        MPID_Comm *comm_ptr,
+        int *errflag )
 {
     int mpi_errno = MPI_SUCCESS;
     int mpi_errno_ret = MPI_SUCCESS;
@@ -1035,9 +1064,10 @@
 
 #if defined(USE_SMP_COLLECTIVES)
     if (MPIR_Comm_is_node_aware(comm_ptr)) {
-        mpi_errno = MPIR_SMP_Bcast(buffer, count, datatype, root, comm_ptr);
+        mpi_errno = MPIR_SMP_Bcast(buffer, count, datatype, root, comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -1070,9 +1100,10 @@
 
     if ((nbytes < MPIR_PARAM_BCAST_SHORT_MSG_SIZE) || (comm_size < MPIR_PARAM_BCAST_MIN_PROCS))
     {
-        mpi_errno = MPIR_Bcast_binomial(buffer, count, datatype, root, comm_ptr);
+        mpi_errno = MPIR_Bcast_binomial(buffer, count, datatype, root, comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -1081,9 +1112,10 @@
     {
         if ((nbytes < MPIR_PARAM_BCAST_LONG_MSG_SIZE) && (MPIU_is_pof2(comm_size, NULL)))
         {
-            mpi_errno = MPIR_Bcast_scatter_doubling_allgather(buffer, count, datatype, root, comm_ptr);
+            mpi_errno = MPIR_Bcast_scatter_doubling_allgather(buffer, count, datatype, root, comm_ptr, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -1093,9 +1125,10 @@
             /* We want the ring algorithm whether or not we have a
                topologically aware communicator.  Doing inter/intra-node
                communication phases breaks the pipelining of the algorithm.  */
-            mpi_errno = MPIR_Bcast_scatter_ring_allgather(buffer, count, datatype, root, comm_ptr);
+            mpi_errno = MPIR_Bcast_scatter_ring_allgather(buffer, count, datatype, root, comm_ptr, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -1110,6 +1143,8 @@
 
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -1127,7 +1162,8 @@
     int count, 
     MPI_Datatype datatype, 
     int root, 
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag)
 {
 /*  Intercommunicator broadcast.
     Root sends to rank 0 in remote group. Remote group does local
@@ -1153,10 +1189,11 @@
     {
         /* root sends to rank 0 on remote group and returns */
         MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-        mpi_errno =  MPIC_Send(buffer, count, datatype, 0,
-                               MPIR_BCAST_TAG, comm); 
+        mpi_errno =  MPIC_Send_ft(buffer, count, datatype, 0,
+                                  MPIR_BCAST_TAG, comm, errflag); 
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -1170,10 +1207,11 @@
         
         if (rank == 0)
         {
-            mpi_errno = MPIC_Recv(buffer, count, datatype, root,
-                                  MPIR_BCAST_TAG, comm, &status);
+            mpi_errno = MPIC_Recv_ft(buffer, count, datatype, root,
+                                     MPIR_BCAST_TAG, comm, &status, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -1187,9 +1225,10 @@
 
         /* now do the usual broadcast on this intracommunicator
            with rank 0 as root. */
-        mpi_errno = MPIR_Bcast_intra(buffer, count, datatype, 0, newcomm_ptr);
+        mpi_errno = MPIR_Bcast_intra(buffer, count, datatype, 0, newcomm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -1199,6 +1238,8 @@
     MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_BCAST_INTER);
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
 }
 
@@ -1211,7 +1252,7 @@
 #define FUNCNAME MPIR_Bcast_impl
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPIR_Bcast_impl(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr)
+int MPIR_Bcast_impl(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
 
@@ -1219,7 +1260,7 @@
     {
 	/* --BEGIN USEREXTENSION-- */
 	mpi_errno = comm_ptr->coll_fns->Bcast(buffer, count,
-                                              datatype, root, comm_ptr);
+                                              datatype, root, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 	/* --END USEREXTENSION-- */
     }
@@ -1228,14 +1269,14 @@
         if (comm_ptr->comm_kind == MPID_INTRACOMM)
 	{
             /* intracommunicator */
-            mpi_errno = MPIR_Bcast_intra( buffer, count, datatype, root, comm_ptr );
+            mpi_errno = MPIR_Bcast_intra( buffer, count, datatype, root, comm_ptr, errflag );
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
             
 	}
         else
 	{
             /* intercommunicator */
-            mpi_errno = MPIR_Bcast_inter( buffer, count, datatype, root, comm_ptr );
+            mpi_errno = MPIR_Bcast_inter( buffer, count, datatype, root, comm_ptr, errflag );
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         }
     }
@@ -1254,18 +1295,18 @@
 #define FUNCNAME MPIR_Bcast
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPIR_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr)
+int MPIR_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
 
     if (comm_ptr->comm_kind == MPID_INTRACOMM) {
         /* intracommunicator */
-        mpi_errno = MPIR_Bcast_intra( buffer, count, datatype, root, comm_ptr );
+        mpi_errno = MPIR_Bcast_intra( buffer, count, datatype, root, comm_ptr, errflag );
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         
     } else {
         /* intercommunicator */
-        mpi_errno = MPIR_Bcast_inter( buffer, count, datatype, root, comm_ptr );
+        mpi_errno = MPIR_Bcast_inter( buffer, count, datatype, root, comm_ptr, errflag );
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -1313,6 +1354,7 @@
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Comm *comm_ptr = NULL;
+    int errflag = FALSE;
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_BCAST);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -1370,7 +1412,7 @@
 
     /* ... body of routine ...  */
     
-    mpi_errno = MPIR_Bcast_impl( buffer, count, datatype, root, comm_ptr );
+    mpi_errno = MPIR_Bcast_impl( buffer, count, datatype, root, comm_ptr, &errflag );
     if (mpi_errno) goto fn_fail;
 
     /* ... end of body of routine ... */

Modified: mpich2/trunk/src/mpi/coll/exscan.c
===================================================================
--- mpich2/trunk/src/mpi/coll/exscan.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/exscan.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -102,7 +102,8 @@
     int count, 
     MPI_Datatype datatype, 
     MPI_Op op, 
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag )
 {
     MPI_Status status;
     int        rank, comm_size;
@@ -183,13 +184,14 @@
         dst = rank ^ mask;
         if (dst < comm_size) {
             /* Send partial_scan to dst. Recv into tmp_buf */
-            mpi_errno = MPIC_Sendrecv(partial_scan, count, datatype,
-                                      dst, MPIR_EXSCAN_TAG, tmp_buf,
-                                      count, datatype, dst,
-                                      MPIR_EXSCAN_TAG, comm,
-                                      &status);
+            mpi_errno = MPIC_Sendrecv_ft(partial_scan, count, datatype,
+                                         dst, MPIR_EXSCAN_TAG, tmp_buf,
+                                         count, datatype, dst,
+                                         MPIR_EXSCAN_TAG, comm,
+                                         &status, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -244,6 +246,8 @@
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -258,15 +262,15 @@
 #define FUNCNAME MPIR_Exscan_impl
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPIR_Exscan_impl(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr)
+int MPIR_Exscan_impl(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
 
     if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Exscan != NULL) {
-	mpi_errno = comm_ptr->coll_fns->Exscan(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+	mpi_errno = comm_ptr->coll_fns->Exscan(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
-	mpi_errno = MPIR_Exscan(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+	mpi_errno = MPIR_Exscan(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -323,6 +327,7 @@
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Comm *comm_ptr = NULL;
+    int errflag = FALSE;
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_EXSCAN);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -394,7 +399,7 @@
 
     /* ... body of routine ...  */
 
-    mpi_errno = MPIR_Exscan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+    mpi_errno = MPIR_Exscan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, &errflag);
     if (mpi_errno) goto fn_fail;
 
     /* ... end of body of routine ... */

Modified: mpich2/trunk/src/mpi/coll/gather.c
===================================================================
--- mpich2/trunk/src/mpi/coll/gather.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/gather.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -58,7 +58,8 @@
 	int recvcnt, 
 	MPI_Datatype recvtype, 
 	int root, 
-	MPID_Comm *comm_ptr )
+	MPID_Comm *comm_ptr,
+        int *errflag )
 {
     int        comm_size, rank;
     int mpi_errno = MPI_SUCCESS;
@@ -190,21 +191,27 @@
 			     * receive buffer, place it directly. This
 			     * should cover the case where the root is
 			     * rank 0. */
-			    mpi_errno = MPIC_Recv(((char *)recvbuf +
-						   (((rank + mask) % comm_size)*recvcnt*extent)),
-						  recvblks * recvcnt, recvtype, src,
-						  MPIR_GATHER_TAG, comm,
-						  &status);
+			    mpi_errno = MPIC_Recv_ft(((char *)recvbuf +
+                                                      (((rank + mask) % comm_size)*recvcnt*extent)),
+                                                     recvblks * recvcnt, recvtype, src,
+                                                     MPIR_GATHER_TAG, comm,
+                                                     &status, errflag);
                             if (mpi_errno) {
                                 /* for communication errors, just record the error but continue */
+                                *errflag = TRUE;
                                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                             }
 			}
 			else if (nbytes < MPIR_PARAM_GATHER_VSMALL_MSG_SIZE) {
-			    mpi_errno = MPIC_Recv(tmp_buf, recvblks * nbytes, MPI_BYTE,
-						  src, MPIR_GATHER_TAG, comm, &status);
-                            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+			    mpi_errno = MPIC_Recv_ft(tmp_buf, recvblks * nbytes, MPI_BYTE,
+                                                     src, MPIR_GATHER_TAG, comm, &status, errflag);
+                            if (mpi_errno) {
+                                /* for communication errors, just record the error but continue */
+                                *errflag = TRUE;
+                                MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+                                MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
+                            }
 			    copy_offset = rank + mask;
 			    copy_blks = recvblks;
 			}
@@ -220,10 +227,11 @@
 			    mpi_errno = MPIR_Type_commit_impl(&tmp_type);
                             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 			    
-			    mpi_errno = MPIC_Recv(recvbuf, 1, tmp_type, src,
-						  MPIR_GATHER_TAG, comm, &status);
+			    mpi_errno = MPIC_Recv_ft(recvbuf, 1, tmp_type, src,
+                                                     MPIR_GATHER_TAG, comm, &status, errflag);
                             if (mpi_errno) {
                                 /* for communication errors, just record the error but continue */
+                                *errflag = TRUE;
                                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                             }
@@ -245,12 +253,13 @@
 			    offset = mask * nbytes;
 			else
 			    offset = (mask - 1) * nbytes;
-			mpi_errno = MPIC_Recv(((char *)tmp_buf + offset),
-					      recvblks * nbytes, MPI_BYTE, src,
-					      MPIR_GATHER_TAG, comm,
-					      &status);
+			mpi_errno = MPIC_Recv_ft(((char *)tmp_buf + offset),
+                                                 recvblks * nbytes, MPI_BYTE, src,
+                                                 MPIR_GATHER_TAG, comm,
+                                                 &status, errflag);
                         if (mpi_errno) {
                             /* for communication errors, just record the error but continue */
+                            *errflag = TRUE;
                             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                         }
@@ -266,19 +275,21 @@
 		if (!tmp_buf_size)
 		{
                     /* leaf nodes send directly from sendbuf */
-                    mpi_errno = MPIC_Send(sendbuf, sendcnt, sendtype, dst,
-                                          MPIR_GATHER_TAG, comm);
+                    mpi_errno = MPIC_Send_ft(sendbuf, sendcnt, sendtype, dst,
+                                             MPIR_GATHER_TAG, comm, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
                 }
                 else if (nbytes < MPIR_PARAM_GATHER_VSMALL_MSG_SIZE) {
-		    mpi_errno = MPIC_Send(tmp_buf, curr_cnt, MPI_BYTE, dst,
-					  MPIR_GATHER_TAG, comm);
+		    mpi_errno = MPIC_Send_ft(tmp_buf, curr_cnt, MPI_BYTE, dst,
+                                             MPIR_GATHER_TAG, comm, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
@@ -297,10 +308,11 @@
 		    mpi_errno = MPIR_Type_commit_impl(&tmp_type);
                     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
-		    mpi_errno = MPIC_Send(MPI_BOTTOM, 1, tmp_type, dst,
-					  MPIR_GATHER_TAG, comm);
+		    mpi_errno = MPIC_Send_ft(MPI_BOTTOM, 1, tmp_type, dst,
+                                             MPIR_GATHER_TAG, comm, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
@@ -367,12 +379,13 @@
                 if (src < comm_size)
 		{
                     src = (src + root) % comm_size;
-                    mpi_errno = MPIC_Recv(((char *)tmp_buf + curr_cnt), 
-                                          tmp_buf_size-curr_cnt, MPI_BYTE, src,
-                                          MPIR_GATHER_TAG, comm, 
-                                          &status);
+                    mpi_errno = MPIC_Recv_ft(((char *)tmp_buf + curr_cnt), 
+                                             tmp_buf_size-curr_cnt, MPI_BYTE, src,
+                                             MPIR_GATHER_TAG, comm, 
+                                             &status, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                         recv_size = 0;
@@ -387,10 +400,11 @@
 	    {
                 dst = relative_rank ^ mask;
                 dst = (dst + root) % comm_size;
-                mpi_errno = MPIC_Send(tmp_buf, curr_cnt, MPI_BYTE, dst,
-                                      MPIR_GATHER_TAG, comm);
+                mpi_errno = MPIC_Send_ft(tmp_buf, curr_cnt, MPI_BYTE, dst,
+                                         MPIR_GATHER_TAG, comm, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -434,6 +448,8 @@
     MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -454,7 +470,8 @@
 	int recvcnt, 
 	MPI_Datatype recvtype, 
 	int root, 
-	MPID_Comm *comm_ptr )
+	MPID_Comm *comm_ptr,
+        int *errflag )
 {
 /*  Intercommunicator gather.
     For short messages, remote group does a local intracommunicator
@@ -506,11 +523,12 @@
         if (root == MPI_ROOT)
 	{
             /* root receives data from rank 0 on remote group */
-            mpi_errno = MPIC_Recv(recvbuf, recvcnt*remote_size,
-                                  recvtype, 0, MPIR_GATHER_TAG, comm,
-                                  &status);
+            mpi_errno = MPIC_Recv_ft(recvbuf, recvcnt*remote_size,
+                                     recvtype, 0, MPIR_GATHER_TAG, comm,
+                                     &status, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -546,20 +564,22 @@
             /* now do the a local gather on this intracommunicator */
             mpi_errno = MPIR_Gather_impl(sendbuf, sendcnt, sendtype,
                                          tmp_buf, sendcnt, sendtype, 0,
-                                         newcomm_ptr);
+                                         newcomm_ptr, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
             
             if (rank == 0)
 	    {
-                mpi_errno = MPIC_Send(tmp_buf, sendcnt*local_size,
-                                      sendtype, root,
-                                      MPIR_GATHER_TAG, comm);
+                mpi_errno = MPIC_Send_ft(tmp_buf, sendcnt*local_size,
+                                         sendtype, root,
+                                         MPIR_GATHER_TAG, comm, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -577,11 +597,12 @@
 
             for (i=0; i<remote_size; i++)
 	    {
-                mpi_errno = MPIC_Recv(((char *)recvbuf+recvcnt*i*extent), 
-                                      recvcnt, recvtype, i,
-                                      MPIR_GATHER_TAG, comm, &status);
+                mpi_errno = MPIC_Recv_ft(((char *)recvbuf+recvcnt*i*extent), 
+                                         recvcnt, recvtype, i,
+                                         MPIR_GATHER_TAG, comm, &status, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -589,10 +610,11 @@
         }
         else
 	{
-            mpi_errno = MPIC_Send(sendbuf,sendcnt,sendtype,root,
-                                  MPIR_GATHER_TAG,comm);
+            mpi_errno = MPIC_Send_ft(sendbuf,sendcnt,sendtype,root,
+                                     MPIR_GATHER_TAG,comm, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -604,6 +626,8 @@
     MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -619,7 +643,7 @@
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Gather(void *sendbuf, int sendcnt, MPI_Datatype sendtype,
                 void *recvbuf, int recvcnt, MPI_Datatype recvtype,
-                int root, MPID_Comm *comm_ptr)
+                int root, MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
         
@@ -627,13 +651,13 @@
         /* intracommunicator */
         mpi_errno = MPIR_Gather_intra(sendbuf, sendcnt, sendtype,
                                       recvbuf, recvcnt, recvtype, root,
-                                      comm_ptr);
+                                      comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         /* intercommunicator */
         mpi_errno = MPIR_Gather_inter(sendbuf, sendcnt, sendtype,
                                       recvbuf, recvcnt, recvtype, root,
-                                      comm_ptr);
+                                      comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -653,19 +677,19 @@
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Gather_impl(void *sendbuf, int sendcnt, MPI_Datatype sendtype,
                      void *recvbuf, int recvcnt, MPI_Datatype recvtype,
-                     int root, MPID_Comm *comm_ptr)
+                     int root, MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
         
     if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Gather != NULL) {
 	mpi_errno = comm_ptr->coll_fns->Gather(sendbuf, sendcnt,
                                                sendtype, recvbuf, recvcnt,
-                                               recvtype, root, comm_ptr);
+                                               recvtype, root, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         mpi_errno = MPIR_Gather(sendbuf, sendcnt, sendtype,
                                 recvbuf, recvcnt, recvtype, root,
-                                comm_ptr);
+                                comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -717,6 +741,7 @@
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Comm *comm_ptr = NULL;
+    int errflag = FALSE;
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_GATHER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -820,7 +845,7 @@
 
     /* ... body of routine ...  */
 
-    mpi_errno = MPIR_Gather_impl(sendbuf, sendcnt, sendtype, recvbuf, recvcnt, recvtype, root, comm_ptr);
+    mpi_errno = MPIR_Gather_impl(sendbuf, sendcnt, sendtype, recvbuf, recvcnt, recvtype, root, comm_ptr, &errflag);
     if (mpi_errno) goto fn_fail;
         
     /* ... end of body of routine ... */

Modified: mpich2/trunk/src/mpi/coll/gatherv.c
===================================================================
--- mpich2/trunk/src/mpi/coll/gatherv.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/gatherv.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -57,7 +57,8 @@
 	int *displs, 
 	MPI_Datatype recvtype, 
 	int root, 
-	MPID_Comm *comm_ptr )
+	MPID_Comm *comm_ptr,
+        int *errflag )
 {
     int        comm_size, rank;
     int        mpi_errno = MPI_SUCCESS;
@@ -104,16 +105,16 @@
                     }
                 }
                 else {
-                    mpi_errno = MPIC_Irecv(((char *)recvbuf+displs[i]*extent), 
-                                           recvcnts[i], recvtype, i,
-                                           MPIR_GATHERV_TAG, comm,
-                                           &reqarray[reqs++]);
+                    mpi_errno = MPIC_Irecv_ft(((char *)recvbuf+displs[i]*extent), 
+                                              recvcnts[i], recvtype, i,
+                                              MPIR_GATHERV_TAG, comm,
+                                              &reqarray[reqs++]);
                     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
                 }
             }
         }
         /* ... then wait for *all* of them to finish: */
-        mpi_errno = MPIR_Waitall_impl(reqs, reqarray, starray);
+        mpi_errno = MPIC_Waitall_ft(reqs, reqarray, starray, errflag);
         if (mpi_errno&& mpi_errno != MPI_ERR_IN_STATUS) MPIU_ERR_POP(mpi_errno);
         
         /* --BEGIN ERROR HANDLING-- */
@@ -123,6 +124,7 @@
                     mpi_errno = starray[i].MPI_ERROR;
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
@@ -146,19 +148,21 @@
                 MPIR_PARAM_GET_DEFAULT_INT(GATHERV_INTER_SSEND_MIN_PROCS,&min_procs);
 
             if (comm_size >= min_procs) {
-                mpi_errno = MPIC_Ssend(sendbuf, sendcnt, sendtype, root, 
-                                       MPIR_GATHERV_TAG, comm);
+                mpi_errno = MPIC_Ssend_ft(sendbuf, sendcnt, sendtype, root, 
+                                          MPIR_GATHERV_TAG, comm, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
             }
             else {
-                mpi_errno = MPIC_Send(sendbuf, sendcnt, sendtype, root, 
-                                      MPIR_GATHERV_TAG, comm);
+                mpi_errno = MPIC_Send_ft(sendbuf, sendcnt, sendtype, root, 
+                                         MPIR_GATHERV_TAG, comm, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -173,6 +177,8 @@
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -188,19 +194,19 @@
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Gatherv_impl(void *sendbuf, int sendcnt, MPI_Datatype sendtype,
                       void *recvbuf, int *recvcnts, int *displs, MPI_Datatype recvtype,
-                      int root, MPID_Comm *comm_ptr)
+                      int root, MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
         
     if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Gatherv != NULL) {
 	mpi_errno = comm_ptr->coll_fns->Gatherv(sendbuf, sendcnt, sendtype,
                                                 recvbuf, recvcnts, displs, recvtype,
-                                                root, comm_ptr);
+                                                root, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         mpi_errno = MPIR_Gatherv(sendbuf, sendcnt, sendtype,
                                  recvbuf, recvcnts, displs, recvtype,
-                                 root, comm_ptr);
+                                 root, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -256,6 +262,7 @@
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Comm *comm_ptr = NULL;
+    int errflag = FALSE;
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_GATHERV);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -373,7 +380,7 @@
 
     mpi_errno = MPIR_Gatherv_impl(sendbuf, sendcnt, sendtype,
                                   recvbuf, recvcnts, displs, recvtype,
-                                  root, comm_ptr);
+                                  root, comm_ptr, &errflag);
     if (mpi_errno) goto fn_fail;
 
     /* ... end of body of routine ... */

Modified: mpich2/trunk/src/mpi/coll/helper_fns.c
===================================================================
--- mpich2/trunk/src/mpi/coll/helper_fns.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/helper_fns.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -548,3 +548,300 @@
     return mpi_errno;
     /* --END ERROR HANDLING-- */
 }
+
+
+/* Fault-tolerance versions.  When a process fails, collectives will
+   still complete, however the result may be invalid.  Processes
+   directly communicating with the failed process can detect the
+   failure, however another mechanism is needed to commuinicate the
+   failure to other processes receiving the invalid data.  To do this
+   we introduce the _ft versions of the MPIC_ helper functions.  These
+   functions take a pointer to an error flag.  When this is set to
+   TRUE, the send functions will communicate the failure to the
+   receiver.  If a function detects a failure, either by getting a
+   failure in the communication operation, or by receiving an error
+   indicator from a remote process, it sets the error flag to TRUE.
+
+   In this implementation, we indicate an error to a remote process by
+   sending an empty message instead of the requested buffer.  When a
+   process receives an empty message, it knows to set the error flag.
+   We count on the fact that collectives that exchange data (as
+   opposed to barrier) will never send an empty message.  The barrier
+   collective will not communicate failure information this way, but
+   this is OK since there is no data that can be received corrupted. */
+
+#undef FUNCNAME
+#define FUNCNAME MPIC_Send_ft
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPIC_Send_ft(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+                 MPI_Comm comm, int *errflag)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_STATE_DECL(MPID_STATE_MPIC_SEND_FT);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SEND_FT);
+
+    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
+
+    if (*errflag)
+        mpi_errno = MPIC_Send(buf, count, datatype, dest, MPIR_ERROR_TAG, comm);
+    else
+        mpi_errno = MPIC_Send(buf, count, datatype, dest, tag, comm);
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SEND_FT);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPIC_Recv_ft
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPIC_Recv_ft(void *buf, int count, MPI_Datatype datatype, int source, int tag,
+                 MPI_Comm comm, MPI_Status *status, int *errflag)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPI_Status mystatus;
+    MPIDI_STATE_DECL(MPID_STATE_MPIC_RECV_FT);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_RECV_FT);
+
+    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
+
+    if (status == MPI_STATUS_IGNORE)
+        status = &mystatus;
+    
+    mpi_errno = MPIC_Recv(buf, count, datatype, source, MPI_ANY_TAG, comm, status);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+    if (*errflag)
+        goto fn_exit;
+
+    if (source != MPI_PROC_NULL) {
+        if (status->MPI_TAG == MPIR_ERROR_TAG)
+            *errflag = TRUE;
+        else {
+            MPIU_Assert(status->MPI_TAG == tag);
+        }
+    }
+
+ fn_exit:
+    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_RECV_FT);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPIC_Ssend_ft
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPIC_Ssend_ft(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+                  MPI_Comm comm, int *errflag)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_STATE_DECL(MPID_STATE_MPIC_SSEND_FT);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SSEND_FT);
+
+    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
+    
+    if (*errflag)
+        mpi_errno = MPIC_Ssend(buf, count, datatype, dest, MPIR_ERROR_TAG, comm);
+    else
+        mpi_errno = MPIC_Ssend(buf, count, datatype, dest, tag, comm);
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SSEND_FT);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPIC_Sendrecv_ft
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPIC_Sendrecv_ft(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                     int dest, int sendtag, void *recvbuf, int recvcount,
+                     MPI_Datatype recvtype, int source, int recvtag,
+                     MPI_Comm comm, MPI_Status *status, int *errflag)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPI_Status mystatus;
+    MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV_FT);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SENDRECV_FT);
+
+    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
+
+    if (status == MPI_STATUS_IGNORE)
+        status = &mystatus;
+    
+    if (*errflag) {
+        mpi_errno = MPIC_Sendrecv(sendbuf, sendcount, sendtype, dest, MPIR_ERROR_TAG,
+                                  recvbuf, recvcount, recvtype, source, MPI_ANY_TAG,
+                                  comm, status);
+        goto fn_exit;
+    }
+    
+    mpi_errno = MPIC_Sendrecv(sendbuf, sendcount, sendtype, dest, sendtag,
+                              recvbuf, recvcount, recvtype, source, MPI_ANY_TAG,
+                              comm, status);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+    if (source != MPI_PROC_NULL) {
+        if (status->MPI_TAG == MPIR_ERROR_TAG)
+            *errflag = TRUE;
+        else {
+            MPIU_Assert(status->MPI_TAG == recvtag);
+        }
+    }
+    
+ fn_exit:
+    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");
+
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SENDRECV_FT);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPIC_Sendrecv_replace_ft
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPIC_Sendrecv_replace_ft(void *buf, int count, MPI_Datatype datatype,
+                             int dest, int sendtag,
+                             int source, int recvtag,
+                             MPI_Comm comm, MPI_Status *status, int *errflag)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPI_Status mystatus;
+    MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV_REPLACE_FT);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SENDRECV_REPLACE_FT);
+
+    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
+
+    if (status == MPI_STATUS_IGNORE)
+        status = &mystatus;
+    
+    if (*errflag) {
+        mpi_errno = MPIC_Sendrecv_replace(buf, count, datatype,
+                                          dest, MPIR_ERROR_TAG,
+                                          source, recvtag,
+                                          comm, status);
+        goto fn_exit;
+    }
+    
+    mpi_errno = MPIC_Sendrecv_replace(buf, count, datatype,
+                                      dest, sendtag,
+                                      source, MPI_ANY_TAG,
+                                      comm, status);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+    
+    if (source != MPI_PROC_NULL) {
+        if (status->MPI_TAG == MPIR_ERROR_TAG)
+            *errflag = TRUE;
+        else {
+            MPIU_Assert(status->MPI_TAG == recvtag);
+        }
+    }
+
+ fn_exit:
+    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SENDRECV_REPLACE_FT);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPIC_Isend_ft
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPIC_Isend_ft(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+                  MPI_Comm comm, MPI_Request *request, int *errflag)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_STATE_DECL(MPID_STATE_MPIC_ISEND_FT);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_ISEND_FT);
+
+    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
+
+    if (*errflag)
+        mpi_errno = MPIC_Isend(buf, count, datatype, dest, MPIR_ERROR_TAG, comm, request);
+    else
+        mpi_errno = MPIC_Isend(buf, count, datatype, dest, tag, comm, request);
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_ISEND_FT);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPIC_Irecv_ft
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPIC_Irecv_ft(void *buf, int count, MPI_Datatype datatype, int source,
+                  int tag, MPI_Comm comm, MPI_Request *request)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_STATE_DECL(MPID_STATE_MPIC_IRECV_FT);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_IRECV_FT);
+
+    mpi_errno = MPIC_Irecv(buf, count, datatype, source, MPI_ANY_TAG, comm, request);
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_IRECV_FT);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+
+#undef FUNCNAME
+#define FUNCNAME MPIC_Waitall_ft
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPIC_Waitall_ft(int numreq, MPI_Request requests[], MPI_Status statuses[], int *errflag)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int i;
+    MPIDI_STATE_DECL(MPID_STATE_MPIC_WAITALL_FT);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_WAITALL_FT);
+
+    MPIU_Assert(statuses != MPI_STATUSES_IGNORE);
+
+    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
+
+    mpi_errno = MPIR_Waitall_impl(numreq, requests, statuses);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+    if (*errflag)
+        goto fn_exit;
+
+    for (i = 0; i < numreq; ++i) {
+        if (statuses[i].MPI_TAG == MPIR_ERROR_TAG) {
+            *errflag = TRUE;
+            break;
+        }
+    }
+
+ fn_exit:
+    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_WAITALL_FT);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}

Modified: mpich2/trunk/src/mpi/coll/red_scat.c
===================================================================
--- mpich2/trunk/src/mpi/coll/red_scat.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/red_scat.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -80,7 +80,8 @@
     int *recvcnts,
     MPI_Datatype datatype,
     MPI_Op op,
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag )
 {
     int mpi_errno = MPI_SUCCESS;
     int mpi_errno_ret = MPI_SUCCESS;
@@ -187,13 +188,14 @@
             send_offset += size;
         }
 
-        mpi_errno = MPIC_Sendrecv(outgoing_data + send_offset*true_extent,
-                                  size, datatype, peer, MPIR_REDUCE_SCATTER_TAG,
-                                  incoming_data + recv_offset*true_extent,
-                                  size, datatype, peer, MPIR_REDUCE_SCATTER_TAG,
-                                  comm, MPI_STATUS_IGNORE);
+        mpi_errno = MPIC_Sendrecv_ft(outgoing_data + send_offset*true_extent,
+                                     size, datatype, peer, MPIR_REDUCE_SCATTER_TAG,
+                                     incoming_data + recv_offset*true_extent,
+                                     size, datatype, peer, MPIR_REDUCE_SCATTER_TAG,
+                                     comm, MPI_STATUS_IGNORE, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -229,6 +231,8 @@
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -293,7 +297,8 @@
     int *recvcnts, 
     MPI_Datatype datatype, 
     MPI_Op op, 
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag )
 {
     int   rank, comm_size, i;
     MPI_Aint extent, true_extent, true_lb; 
@@ -413,11 +418,12 @@
 
         if (rank < 2*rem) {
             if (rank % 2 == 0) { /* even */
-                mpi_errno = MPIC_Send(tmp_results, total_count, 
-                                      datatype, rank+1,
-                                      MPIR_REDUCE_SCATTER_TAG, comm);
+                mpi_errno = MPIC_Send_ft(tmp_results, total_count, 
+                                         datatype, rank+1,
+                                         MPIR_REDUCE_SCATTER_TAG, comm, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -428,12 +434,13 @@
                 newrank = -1; 
             }
             else { /* odd */
-                mpi_errno = MPIC_Recv(tmp_recvbuf, total_count, 
-                                      datatype, rank-1,
-                                      MPIR_REDUCE_SCATTER_TAG, comm,
-                                      MPI_STATUS_IGNORE);
+                mpi_errno = MPIC_Recv_ft(tmp_recvbuf, total_count, 
+                                         datatype, rank-1,
+                                         MPIR_REDUCE_SCATTER_TAG, comm,
+                                         MPI_STATUS_IGNORE, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -513,30 +520,31 @@
 */
                 /* Send data from tmp_results. Recv into tmp_recvbuf */ 
                 if ((send_cnt != 0) && (recv_cnt != 0)) 
-                    mpi_errno = MPIC_Sendrecv((char *) tmp_results +
-                                          newdisps[send_idx]*extent,
-                                          send_cnt, datatype,  
-                                          dst, MPIR_REDUCE_SCATTER_TAG, 
-                                          (char *) tmp_recvbuf +
-                                          newdisps[recv_idx]*extent,
-                                          recv_cnt, datatype, dst,
-                                          MPIR_REDUCE_SCATTER_TAG, comm,
-                                          MPI_STATUS_IGNORE); 
+                    mpi_errno = MPIC_Sendrecv_ft((char *) tmp_results +
+                                                 newdisps[send_idx]*extent,
+                                                 send_cnt, datatype,
+                                                 dst, MPIR_REDUCE_SCATTER_TAG,
+                                                 (char *) tmp_recvbuf +
+                                                 newdisps[recv_idx]*extent,
+                                                 recv_cnt, datatype, dst,
+                                                 MPIR_REDUCE_SCATTER_TAG, comm,
+                                                 MPI_STATUS_IGNORE, errflag);
                 else if ((send_cnt == 0) && (recv_cnt != 0))
-                    mpi_errno = MPIC_Recv((char *) tmp_recvbuf +
-                                          newdisps[recv_idx]*extent,
-                                          recv_cnt, datatype, dst,
-                                          MPIR_REDUCE_SCATTER_TAG, comm,
-                                          MPI_STATUS_IGNORE);
+                    mpi_errno = MPIC_Recv_ft((char *) tmp_recvbuf +
+                                             newdisps[recv_idx]*extent,
+                                             recv_cnt, datatype, dst,
+                                             MPIR_REDUCE_SCATTER_TAG, comm,
+                                             MPI_STATUS_IGNORE, errflag);
                 else if ((recv_cnt == 0) && (send_cnt != 0))
-                    mpi_errno = MPIC_Send((char *) tmp_results +
-                                          newdisps[send_idx]*extent,
-                                          send_cnt, datatype,  
-                                          dst, MPIR_REDUCE_SCATTER_TAG,
-                                          comm);  
+                    mpi_errno = MPIC_Send_ft((char *) tmp_results +
+                                             newdisps[send_idx]*extent,
+                                             send_cnt, datatype,
+                                             dst, MPIR_REDUCE_SCATTER_TAG,
+                                             comm, errflag);
 
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -583,12 +591,13 @@
         if (rank < 2*rem) {
             if (rank % 2) { /* odd */
                 if (recvcnts[rank-1]) {
-                    mpi_errno = MPIC_Send((char *) tmp_results +
-                                      disps[rank-1]*extent, recvcnts[rank-1],
-                                      datatype, rank-1,
-                                      MPIR_REDUCE_SCATTER_TAG, comm);
+                    mpi_errno = MPIC_Send_ft((char *) tmp_results +
+                                             disps[rank-1]*extent, recvcnts[rank-1],
+                                             datatype, rank-1,
+                                             MPIR_REDUCE_SCATTER_TAG, comm, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
@@ -596,12 +605,13 @@
             }
             else  {   /* even */
                 if (recvcnts[rank]) {
-                    mpi_errno = MPIC_Recv(recvbuf, recvcnts[rank],
-                                      datatype, rank+1,
-                                      MPIR_REDUCE_SCATTER_TAG, comm,
-                                      MPI_STATUS_IGNORE);
+                    mpi_errno = MPIC_Recv_ft(recvbuf, recvcnts[rank],
+                                             datatype, rank+1,
+                                             MPIR_REDUCE_SCATTER_TAG, comm,
+                                             MPI_STATUS_IGNORE, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
@@ -635,22 +645,23 @@
             /* send the data that dst needs. recv data that this process
                needs from src into tmp_recvbuf */
             if (sendbuf != MPI_IN_PLACE) 
-                mpi_errno = MPIC_Sendrecv(((char *)sendbuf+disps[dst]*extent), 
-                                          recvcnts[dst], datatype, dst,
-                                          MPIR_REDUCE_SCATTER_TAG, tmp_recvbuf,
-                                          recvcnts[rank], datatype, src,
-                                          MPIR_REDUCE_SCATTER_TAG, comm,
-                                          MPI_STATUS_IGNORE);
+                mpi_errno = MPIC_Sendrecv_ft(((char *)sendbuf+disps[dst]*extent), 
+                                             recvcnts[dst], datatype, dst,
+                                             MPIR_REDUCE_SCATTER_TAG, tmp_recvbuf,
+                                             recvcnts[rank], datatype, src,
+                                             MPIR_REDUCE_SCATTER_TAG, comm,
+                                             MPI_STATUS_IGNORE, errflag);
             else
-                mpi_errno = MPIC_Sendrecv(((char *)recvbuf+disps[dst]*extent), 
-                                          recvcnts[dst], datatype, dst,
-                                          MPIR_REDUCE_SCATTER_TAG, tmp_recvbuf,
-                                          recvcnts[rank], datatype, src,
-                                          MPIR_REDUCE_SCATTER_TAG, comm,
-                                          MPI_STATUS_IGNORE);
+                mpi_errno = MPIC_Sendrecv_ft(((char *)recvbuf+disps[dst]*extent), 
+                                             recvcnts[dst], datatype, dst,
+                                             MPIR_REDUCE_SCATTER_TAG, tmp_recvbuf,
+                                             recvcnts[rank], datatype, src,
+                                             MPIR_REDUCE_SCATTER_TAG, comm,
+                                             MPI_STATUS_IGNORE, errflag);
             
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -755,8 +766,13 @@
 
         if (pof2 == comm_size && is_block_regular) {
             /* noncommutative, pof2 size, and block regular */
-            mpi_errno = MPIR_Reduce_scatter_noncomm(sendbuf, recvbuf, recvcnts, datatype, op, comm_ptr);
-            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            mpi_errno = MPIR_Reduce_scatter_noncomm(sendbuf, recvbuf, recvcnts, datatype, op, comm_ptr, errflag);
+            if (mpi_errno) {
+                /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
+                MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+                MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
+            }
         }
         else {
             /* noncommutative and (non-pof2 or block irregular), use recursive doubling. */
@@ -844,14 +860,15 @@
                        received in tmp_recvbuf and then accumulated into
                        tmp_results. accumulation is done later below.   */ 
 
-                    mpi_errno = MPIC_Sendrecv(tmp_results, 1, sendtype, dst,
-                                              MPIR_REDUCE_SCATTER_TAG, 
-                                              tmp_recvbuf, 1, recvtype, dst,
-                                              MPIR_REDUCE_SCATTER_TAG, comm,
-                                              MPI_STATUS_IGNORE); 
+                    mpi_errno = MPIC_Sendrecv_ft(tmp_results, 1, sendtype, dst,
+                                                 MPIR_REDUCE_SCATTER_TAG, 
+                                                 tmp_recvbuf, 1, recvtype, dst,
+                                                 MPIR_REDUCE_SCATTER_TAG, comm,
+                                                 MPI_STATUS_IGNORE, errflag);
                     received = 1;
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
@@ -893,22 +910,28 @@
                             (rank < tree_root + nprocs_completed)
                             && (dst >= tree_root + nprocs_completed)) {
                             /* send the current result */
-                            mpi_errno = MPIC_Send(tmp_recvbuf, 1, recvtype,
-                                                  dst, MPIR_REDUCE_SCATTER_TAG,
-                                                  comm);  
-                            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+                            mpi_errno = MPIC_Send_ft(tmp_recvbuf, 1, recvtype,
+                                                     dst, MPIR_REDUCE_SCATTER_TAG,
+                                                     comm, errflag);
+                            if (mpi_errno) {
+                                /* for communication errors, just record the error but continue */
+                                *errflag = TRUE;
+                                MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+                                MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
+                            }
                         }
                         /* recv only if this proc. doesn't have data and sender
                            has data */
                         else if ((dst < rank) && 
                                  (dst < tree_root + nprocs_completed) &&
                                  (rank >= tree_root + nprocs_completed)) {
-                            mpi_errno = MPIC_Recv(tmp_recvbuf, 1, recvtype, dst,
-                                                  MPIR_REDUCE_SCATTER_TAG,
-                                                  comm, MPI_STATUS_IGNORE); 
+                            mpi_errno = MPIC_Recv_ft(tmp_recvbuf, 1, recvtype, dst,
+                                                     MPIR_REDUCE_SCATTER_TAG,
+                                                     comm, MPI_STATUS_IGNORE, errflag); 
                             received = 1;
                             if (mpi_errno) {
                                 /* for communication errors, just record the error but continue */
+                                *errflag = TRUE;
                                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                             }
@@ -919,7 +942,7 @@
                 }
 
                 /* The following reduction is done here instead of after 
-                   the MPIC_Sendrecv or MPIC_Recv above. This is
+                   the MPIC_Sendrecv_ft or MPIC_Recv_ft above. This is
                    because to do it above, in the noncommutative 
                    case, we would need an extra temp buffer so as not to
                    overwrite temp_recvbuf, because temp_recvbuf may have
@@ -1001,6 +1024,8 @@
 
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -1019,7 +1044,8 @@
     int *recvcnts, 
     MPI_Datatype datatype, 
     MPI_Op op, 
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag )
 {
 /* Intercommunicator Reduce_scatter.
    We first do an intercommunicator reduce to rank 0 on left group,
@@ -1068,9 +1094,10 @@
         /* reduce from right group to rank 0*/
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
         mpi_errno = MPIR_Reduce_inter(sendbuf, tmp_buf, total_count, datatype, op,
-                                root, comm_ptr);
+                                root, comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -1078,9 +1105,10 @@
         /* reduce to rank 0 of right group */
         root = 0;
         mpi_errno = MPIR_Reduce_inter(sendbuf, tmp_buf, total_count, datatype, op,
-                                root, comm_ptr);
+                                root, comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -1089,9 +1117,10 @@
         /* reduce to rank 0 of left group */
         root = 0;
         mpi_errno = MPIR_Reduce_inter(sendbuf, tmp_buf, total_count, datatype, op,
-                                root, comm_ptr);
+                                root, comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -1099,9 +1128,10 @@
         /* reduce from right group to rank 0 */
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
         mpi_errno = MPIR_Reduce_inter(sendbuf, tmp_buf, total_count, datatype, op,
-                                root, comm_ptr);
+                                root, comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -1116,9 +1146,10 @@
     newcomm_ptr = comm_ptr->local_comm;
 
     mpi_errno = MPIR_Scatterv(tmp_buf, recvcnts, disps, datatype, recvbuf,
-                              recvcnts[rank], datatype, 0, newcomm_ptr);
+                              recvcnts[rank], datatype, 0, newcomm_ptr, errflag);
     if (mpi_errno) {
         /* for communication errors, just record the error but continue */
+        *errflag = TRUE;
         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
     }
@@ -1127,6 +1158,8 @@
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -1142,19 +1175,19 @@
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Reduce_scatter(void *sendbuf, void *recvbuf, int *recvcnts,
-                        MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr)
+                        MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
         
     if (comm_ptr->comm_kind == MPID_INTRACOMM) {
         /* intracommunicator */
         mpi_errno = MPIR_Reduce_scatter_intra(sendbuf, recvbuf, recvcnts,
-                                              datatype, op, comm_ptr);
+                                              datatype, op, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         /* intercommunicator */
         mpi_errno = MPIR_Reduce_scatter_inter(sendbuf, recvbuf, recvcnts,
-                                              datatype, op, comm_ptr);
+                                              datatype, op, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -1173,17 +1206,17 @@
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Reduce_scatter_impl(void *sendbuf, void *recvbuf, int *recvcnts,
-                             MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr)
+                             MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
         
     if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Reduce_scatter != NULL) {
 	mpi_errno = comm_ptr->coll_fns->Reduce_scatter(sendbuf, recvbuf, recvcnts,
-                                                       datatype, op, comm_ptr);
+                                                       datatype, op, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         mpi_errno = MPIR_Reduce_scatter(sendbuf, recvbuf, recvcnts,
-                                        datatype, op, comm_ptr);
+                                        datatype, op, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
     
@@ -1235,6 +1268,7 @@
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Comm *comm_ptr = NULL;
+    int errflag = FALSE;
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_REDUCE_SCATTER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -1312,7 +1346,7 @@
     /* ... body of routine ...  */
 
     mpi_errno = MPIR_Reduce_scatter_impl(sendbuf, recvbuf, recvcnts,
-                                         datatype, op, comm_ptr);
+                                         datatype, op, comm_ptr, &errflag);
     if (mpi_errno) goto fn_fail;
 
     /* ... end of body of routine ... */

Modified: mpich2/trunk/src/mpi/coll/red_scat_block.c
===================================================================
--- mpich2/trunk/src/mpi/coll/red_scat_block.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/red_scat_block.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -86,7 +86,8 @@
     int recvcount,
     MPI_Datatype datatype,
     MPI_Op op,
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag )
 {
     int mpi_errno = MPI_SUCCESS;
     int mpi_errno_ret = MPI_SUCCESS;
@@ -189,13 +190,14 @@
             send_offset += size;
         }
 
-        mpi_errno = MPIC_Sendrecv(outgoing_data + send_offset*true_extent,
-                                  size, datatype, peer, MPIR_REDUCE_SCATTER_BLOCK_TAG,
-                                  incoming_data + recv_offset*true_extent,
-                                  size, datatype, peer, MPIR_REDUCE_SCATTER_BLOCK_TAG,
-                                  comm, MPI_STATUS_IGNORE);
+        mpi_errno = MPIC_Sendrecv_ft(outgoing_data + send_offset*true_extent,
+                                     size, datatype, peer, MPIR_REDUCE_SCATTER_BLOCK_TAG,
+                                     incoming_data + recv_offset*true_extent,
+                                     size, datatype, peer, MPIR_REDUCE_SCATTER_BLOCK_TAG,
+                                     comm, MPI_STATUS_IGNORE, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -233,6 +235,8 @@
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -297,7 +301,8 @@
     int recvcount, 
     MPI_Datatype datatype, 
     MPI_Op op, 
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag )
 {
     int   rank, comm_size, i;
     MPI_Aint extent, true_extent, true_lb; 
@@ -416,11 +421,12 @@
 
         if (rank < 2*rem) {
             if (rank % 2 == 0) { /* even */
-                mpi_errno = MPIC_Send(tmp_results, total_count, 
-                                      datatype, rank+1,
-                                      MPIR_REDUCE_SCATTER_BLOCK_TAG, comm);
+                mpi_errno = MPIC_Send_ft(tmp_results, total_count, 
+                                         datatype, rank+1,
+                                         MPIR_REDUCE_SCATTER_BLOCK_TAG, comm, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -431,12 +437,13 @@
                 newrank = -1; 
             }
             else { /* odd */
-                mpi_errno = MPIC_Recv(tmp_recvbuf, total_count, 
-                                      datatype, rank-1,
-                                      MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
-                                      MPI_STATUS_IGNORE);
+                mpi_errno = MPIC_Recv_ft(tmp_recvbuf, total_count, 
+                                         datatype, rank-1,
+                                         MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
+                                         MPI_STATUS_IGNORE, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -516,30 +523,31 @@
 */
                 /* Send data from tmp_results. Recv into tmp_recvbuf */ 
                 if ((send_cnt != 0) && (recv_cnt != 0)) 
-                    mpi_errno = MPIC_Sendrecv((char *) tmp_results +
-                                          newdisps[send_idx]*extent,
-                                          send_cnt, datatype,  
-                                          dst, MPIR_REDUCE_SCATTER_BLOCK_TAG, 
-                                          (char *) tmp_recvbuf +
-                                          newdisps[recv_idx]*extent,
-                                          recv_cnt, datatype, dst,
-                                          MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
-                                          MPI_STATUS_IGNORE); 
+                    mpi_errno = MPIC_Sendrecv_ft((char *) tmp_results +
+                                                 newdisps[send_idx]*extent,
+                                                 send_cnt, datatype,
+                                                 dst, MPIR_REDUCE_SCATTER_BLOCK_TAG,
+                                                 (char *) tmp_recvbuf +
+                                                 newdisps[recv_idx]*extent,
+                                                 recv_cnt, datatype, dst,
+                                                 MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
+                                                 MPI_STATUS_IGNORE, errflag);
                 else if ((send_cnt == 0) && (recv_cnt != 0))
-                    mpi_errno = MPIC_Recv((char *) tmp_recvbuf +
-                                          newdisps[recv_idx]*extent,
-                                          recv_cnt, datatype, dst,
-                                          MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
-                                          MPI_STATUS_IGNORE);
+                    mpi_errno = MPIC_Recv_ft((char *) tmp_recvbuf +
+                                             newdisps[recv_idx]*extent,
+                                             recv_cnt, datatype, dst,
+                                             MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
+                                             MPI_STATUS_IGNORE, errflag);
                 else if ((recv_cnt == 0) && (send_cnt != 0))
-                    mpi_errno = MPIC_Send((char *) tmp_results +
-                                          newdisps[send_idx]*extent,
-                                          send_cnt, datatype,  
-                                          dst, MPIR_REDUCE_SCATTER_BLOCK_TAG,
-                                          comm);  
+                    mpi_errno = MPIC_Send_ft((char *) tmp_results +
+                                             newdisps[send_idx]*extent,
+                                             send_cnt, datatype,
+                                             dst, MPIR_REDUCE_SCATTER_BLOCK_TAG,
+                                             comm, errflag);
 
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -582,19 +590,20 @@
            calculated for that process */
         if (rank < 2*rem) {
             if (rank % 2) { /* odd */
-                mpi_errno = MPIC_Send((char *) tmp_results +
-                                      disps[rank-1]*extent, recvcount,
-                                      datatype, rank-1,
-                                      MPIR_REDUCE_SCATTER_BLOCK_TAG, comm);
+                mpi_errno = MPIC_Send_ft((char *) tmp_results +
+                                         disps[rank-1]*extent, recvcount,
+                                         datatype, rank-1,
+                                         MPIR_REDUCE_SCATTER_BLOCK_TAG, comm, errflag);
             }
             else  {   /* even */
-                mpi_errno = MPIC_Recv(recvbuf, recvcount,
-                                      datatype, rank+1,
-                                      MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
-                                      MPI_STATUS_IGNORE); 
+                mpi_errno = MPIC_Recv_ft(recvbuf, recvcount,
+                                         datatype, rank+1,
+                                         MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
+                                         MPI_STATUS_IGNORE, errflag);
             }
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -626,22 +635,23 @@
             /* send the data that dst needs. recv data that this process
                needs from src into tmp_recvbuf */
             if (sendbuf != MPI_IN_PLACE) 
-                mpi_errno = MPIC_Sendrecv(((char *)sendbuf+disps[dst]*extent), 
-                                          recvcount, datatype, dst,
-                                          MPIR_REDUCE_SCATTER_BLOCK_TAG, tmp_recvbuf,
-                                          recvcount, datatype, src,
-                                          MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
-                                          MPI_STATUS_IGNORE);
+                mpi_errno = MPIC_Sendrecv_ft(((char *)sendbuf+disps[dst]*extent), 
+                                             recvcount, datatype, dst,
+                                             MPIR_REDUCE_SCATTER_BLOCK_TAG, tmp_recvbuf,
+                                             recvcount, datatype, src,
+                                             MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
+                                             MPI_STATUS_IGNORE, errflag);
             else
-                mpi_errno = MPIC_Sendrecv(((char *)recvbuf+disps[dst]*extent), 
-                                          recvcount, datatype, dst,
-                                          MPIR_REDUCE_SCATTER_BLOCK_TAG, tmp_recvbuf,
-                                          recvcount, datatype, src,
-                                          MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
-                                          MPI_STATUS_IGNORE);
+                mpi_errno = MPIC_Sendrecv_ft(((char *)recvbuf+disps[dst]*extent), 
+                                             recvcount, datatype, dst,
+                                             MPIR_REDUCE_SCATTER_BLOCK_TAG, tmp_recvbuf,
+                                             recvcount, datatype, src,
+                                             MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
+                                             MPI_STATUS_IGNORE, errflag);
             
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -736,8 +746,13 @@
         /* power of two check */
         if (!(comm_size & (comm_size - 1))) {
             /* noncommutative, pof2 size */
-            mpi_errno = MPIR_Reduce_scatter_block_noncomm(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr);
-            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            mpi_errno = MPIR_Reduce_scatter_block_noncomm(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, errflag);
+            if (mpi_errno) {
+                /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
+                MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+                MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
+            }
         }
         else {
             /* noncommutative and non-pof2, use recursive doubling. */
@@ -825,14 +840,15 @@
                        received in tmp_recvbuf and then accumulated into
                        tmp_results. accumulation is done later below.   */ 
 
-                    mpi_errno = MPIC_Sendrecv(tmp_results, 1, sendtype, dst,
-                                              MPIR_REDUCE_SCATTER_BLOCK_TAG, 
-                                              tmp_recvbuf, 1, recvtype, dst,
-                                              MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
-                                              MPI_STATUS_IGNORE); 
+                    mpi_errno = MPIC_Sendrecv_ft(tmp_results, 1, sendtype, dst,
+                                                 MPIR_REDUCE_SCATTER_BLOCK_TAG, 
+                                                 tmp_recvbuf, 1, recvtype, dst,
+                                                 MPIR_REDUCE_SCATTER_BLOCK_TAG, comm,
+                                                 MPI_STATUS_IGNORE, errflag);
                     received = 1;
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
@@ -874,11 +890,12 @@
                             (rank < tree_root + nprocs_completed)
                             && (dst >= tree_root + nprocs_completed)) {
                             /* send the current result */
-                            mpi_errno = MPIC_Send(tmp_recvbuf, 1, recvtype,
-                                                  dst, MPIR_REDUCE_SCATTER_BLOCK_TAG,
-                                                  comm);  
+                            mpi_errno = MPIC_Send_ft(tmp_recvbuf, 1, recvtype,
+                                                     dst, MPIR_REDUCE_SCATTER_BLOCK_TAG,
+                                                     comm, errflag);
                             if (mpi_errno) {
                                 /* for communication errors, just record the error but continue */
+                                *errflag = TRUE;
                                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                             }
@@ -888,12 +905,13 @@
                         else if ((dst < rank) && 
                                  (dst < tree_root + nprocs_completed) &&
                                  (rank >= tree_root + nprocs_completed)) {
-                            mpi_errno = MPIC_Recv(tmp_recvbuf, 1, recvtype, dst,
-                                                  MPIR_REDUCE_SCATTER_BLOCK_TAG,
-                                                  comm, MPI_STATUS_IGNORE); 
+                            mpi_errno = MPIC_Recv_ft(tmp_recvbuf, 1, recvtype, dst,
+                                                     MPIR_REDUCE_SCATTER_BLOCK_TAG,
+                                                     comm, MPI_STATUS_IGNORE, errflag);
                             received = 1;
                             if (mpi_errno) {
                                 /* for communication errors, just record the error but continue */
+                                *errflag = TRUE;
                                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                             }
@@ -904,7 +922,7 @@
                 }
 
                 /* The following reduction is done here instead of after 
-                   the MPIC_Sendrecv or MPIC_Recv above. This is
+                   the MPIC_Sendrecv_ft or MPIC_Recv_ft above. This is
                    because to do it above, in the noncommutative 
                    case, we would need an extra temp buffer so as not to
                    overwrite temp_recvbuf, because temp_recvbuf may have
@@ -986,6 +1004,8 @@
 
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -1004,7 +1024,7 @@
     int recvcount, 
     MPI_Datatype datatype, 
     MPI_Op op, 
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr, int *errflag )
 {
 /* Intercommunicator Reduce_scatter_block.
    We first do an intercommunicator reduce to rank 0 on left group,
@@ -1043,9 +1063,10 @@
         /* reduce from right group to rank 0*/
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
         mpi_errno = MPIR_Reduce_inter(sendbuf, tmp_buf, total_count, datatype, op,
-                                root, comm_ptr);
+                                root, comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -1053,9 +1074,10 @@
         /* reduce to rank 0 of right group */
         root = 0;
         mpi_errno = MPIR_Reduce_inter(sendbuf, tmp_buf, total_count, datatype, op,
-                                root, comm_ptr);
+                                root, comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -1064,9 +1086,10 @@
         /* reduce to rank 0 of left group */
         root = 0;
         mpi_errno = MPIR_Reduce_inter(sendbuf, tmp_buf, total_count, datatype, op,
-                                root, comm_ptr);
+                                root, comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -1074,9 +1097,10 @@
         /* reduce from right group to rank 0 */
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
         mpi_errno = MPIR_Reduce_inter(sendbuf, tmp_buf, total_count, datatype, op,
-                                root, comm_ptr);
+                                root, comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -1089,9 +1113,10 @@
     newcomm_ptr = comm_ptr->local_comm;
 
     mpi_errno = MPIR_Scatter_impl(tmp_buf, recvcount, datatype, recvbuf,
-                                  recvcount, datatype, 0, newcomm_ptr);
+                                  recvcount, datatype, 0, newcomm_ptr, errflag);
     if (mpi_errno) {
         /* for communication errors, just record the error but continue */
+        *errflag = TRUE;
         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
     }
@@ -1100,6 +1125,8 @@
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -1115,17 +1142,17 @@
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Reduce_scatter_block(void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype,
-                              MPI_Op op, MPID_Comm *comm_ptr)
+                              MPI_Op op, MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
         
     if (comm_ptr->comm_kind == MPID_INTRACOMM) {
         /* intracommunicator */
-        mpi_errno = MPIR_Reduce_scatter_block_intra(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr);
+        mpi_errno = MPIR_Reduce_scatter_block_intra(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         /* intercommunicator */
-        mpi_errno = MPIR_Reduce_scatter_block_inter(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr);
+        mpi_errno = MPIR_Reduce_scatter_block_inter(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -1145,21 +1172,21 @@
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Reduce_scatter_block_impl(void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype,
-                                   MPI_Op op, MPID_Comm *comm_ptr)
+                                   MPI_Op op, MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
         
     if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Reduce_scatter_block != NULL) {
-	mpi_errno = comm_ptr->coll_fns->Reduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr);
+	mpi_errno = comm_ptr->coll_fns->Reduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         if (comm_ptr->comm_kind == MPID_INTRACOMM) {
             /* intracommunicator */
-            mpi_errno = MPIR_Reduce_scatter_block_intra(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr);
+            mpi_errno = MPIR_Reduce_scatter_block_intra(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, errflag);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         } else {
             /* intercommunicator */
-            mpi_errno = MPIR_Reduce_scatter_block_inter(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr);
+            mpi_errno = MPIR_Reduce_scatter_block_inter(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, errflag);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         }
     }
@@ -1211,6 +1238,7 @@
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Comm *comm_ptr = NULL;
+    int errflag = FALSE;
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_REDUCE_SCATTER_BLOCK);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -1279,7 +1307,7 @@
 
     /* ... body of routine ...  */
 
-    mpi_errno = MPIR_Reduce_scatter_block_impl(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr);
+    mpi_errno = MPIR_Reduce_scatter_block_impl(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, &errflag);
     if (mpi_errno) goto fn_fail;
 
     /* ... end of body of routine ... */

Modified: mpich2/trunk/src/mpi/coll/reduce.c
===================================================================
--- mpich2/trunk/src/mpi/coll/reduce.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/reduce.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -38,7 +38,8 @@
     MPI_Datatype datatype, 
     MPI_Op op, 
     int root, 
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag )
 {
     int mpi_errno = MPI_SUCCESS;
     int mpi_errno_ret = MPI_SUCCESS;
@@ -167,10 +168,11 @@
             source = (relrank | mask);
             if (source < comm_size) {
                 source = (source + lroot) % comm_size;
-                mpi_errno = MPIC_Recv (tmp_buf, count, datatype, source, 
-                                       MPIR_REDUCE_TAG, comm, &status);
+                mpi_errno = MPIC_Recv_ft(tmp_buf, count, datatype, source, 
+                                         MPIR_REDUCE_TAG, comm, &status, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -206,10 +208,11 @@
             /* I've received all that I'm going to.  Send my result to 
                my parent */
             source = ((relrank & (~ mask)) + lroot) % comm_size;
-            mpi_errno  = MPIC_Send( recvbuf, count, datatype, 
-                                    source, MPIR_REDUCE_TAG, comm );
+            mpi_errno  = MPIC_Send_ft(recvbuf, count, datatype,
+                                      source, MPIR_REDUCE_TAG, comm, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -222,16 +225,17 @@
     {
         if (rank == 0)
         {
-            mpi_errno  = MPIC_Send( recvbuf, count, datatype, root, 
-                                    MPIR_REDUCE_TAG, comm );
+            mpi_errno  = MPIC_Send_ft(recvbuf, count, datatype, root,
+                                      MPIR_REDUCE_TAG, comm, errflag);
         }
         else if (rank == root)
         {
-            mpi_errno = MPIC_Recv ( recvbuf, count, datatype, 0, 
-                                    MPIR_REDUCE_TAG, comm, &status);
+            mpi_errno = MPIC_Recv_ft(recvbuf, count, datatype, 0,
+                                    MPIR_REDUCE_TAG, comm, &status, errflag);
         }
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -250,6 +254,8 @@
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -293,7 +299,8 @@
     MPI_Datatype datatype, 
     MPI_Op op, 
     int root, 
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag )
 {
     int mpi_errno = MPI_SUCCESS;
     int mpi_errno_ret = MPI_SUCCESS;
@@ -402,11 +409,12 @@
     
     if (rank < 2*rem) {
         if (rank % 2 != 0) { /* odd */
-            mpi_errno = MPIC_Send(recvbuf, count, 
-                                  datatype, rank-1,
-                                  MPIR_REDUCE_TAG, comm);
+            mpi_errno = MPIC_Send_ft(recvbuf, count,
+                                     datatype, rank-1,
+                                     MPIR_REDUCE_TAG, comm, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -417,12 +425,13 @@
             newrank = -1; 
         }
         else { /* even */
-            mpi_errno = MPIC_Recv(tmp_buf, count, 
-                                  datatype, rank+1,
-                                  MPIR_REDUCE_TAG, comm,
-                                  MPI_STATUS_IGNORE);
+            mpi_errno = MPIC_Recv_ft(tmp_buf, count,
+                                     datatype, rank+1,
+                                     MPIR_REDUCE_TAG, comm,
+                                     MPI_STATUS_IGNORE, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -495,17 +504,18 @@
                   send_cnt, recv_cnt, last_idx);
 */
             /* Send data from recvbuf. Recv into tmp_buf */ 
-            mpi_errno = MPIC_Sendrecv((char *) recvbuf +
-                                      disps[send_idx]*extent,
-                                      send_cnt, datatype,  
-                                      dst, MPIR_REDUCE_TAG, 
-                                      (char *) tmp_buf +
-                                      disps[recv_idx]*extent,
-                                      recv_cnt, datatype, dst,
-                                      MPIR_REDUCE_TAG, comm,
-                                      MPI_STATUS_IGNORE);
+            mpi_errno = MPIC_Sendrecv_ft((char *) recvbuf +
+                                         disps[send_idx]*extent,
+                                         send_cnt, datatype,
+                                         dst, MPIR_REDUCE_TAG,
+                                         (char *) tmp_buf +
+                                         disps[recv_idx]*extent,
+                                         recv_cnt, datatype, dst,
+                                         MPIR_REDUCE_TAG, comm,
+                                         MPI_STATUS_IGNORE, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -559,11 +569,12 @@
                 for (i=1; i<pof2; i++)
                     disps[i] = disps[i-1] + cnts[i-1];
                 
-                mpi_errno = MPIC_Recv(recvbuf, cnts[0], datatype,  
-                                      0, MPIR_REDUCE_TAG, comm,
-                                      MPI_STATUS_IGNORE);
+                mpi_errno = MPIC_Recv_ft(recvbuf, cnts[0], datatype,
+                                         0, MPIR_REDUCE_TAG, comm,
+                                         MPI_STATUS_IGNORE, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -572,10 +583,11 @@
                 last_idx = 2;
             }
             else if (newrank == 0) {  /* send */
-                mpi_errno = MPIC_Send(recvbuf, cnts[0], datatype,  
-                                      root, MPIR_REDUCE_TAG, comm);
+                mpi_errno = MPIC_Send_ft(recvbuf, cnts[0], datatype,
+                                         root, MPIR_REDUCE_TAG, comm, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -642,13 +654,14 @@
                 /* printf("Rank %d, send_idx %d, send_cnt %d, last_idx %d\n", newrank, send_idx, send_cnt, last_idx);
                    fflush(stdout); */
                 /* Send data from recvbuf. Recv into tmp_buf */ 
-                mpi_errno = MPIC_Send((char *) recvbuf +
-                                      disps[send_idx]*extent,
-                                      send_cnt, datatype,  
-                                      dst, MPIR_REDUCE_TAG, 
-                                      comm);
+                mpi_errno = MPIC_Send_ft((char *) recvbuf +
+                                         disps[send_idx]*extent,
+                                         send_cnt, datatype,
+                                         dst, MPIR_REDUCE_TAG,
+                                         comm, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -658,13 +671,14 @@
                 /* recv and continue */
                 /* printf("Rank %d, recv_idx %d, recv_cnt %d, last_idx %d\n", newrank, recv_idx, recv_cnt, last_idx);
                    fflush(stdout); */
-                mpi_errno = MPIC_Recv((char *) recvbuf +
-                                      disps[recv_idx]*extent,
-                                      recv_cnt, datatype, dst,
-                                      MPIR_REDUCE_TAG, comm,
-                                      MPI_STATUS_IGNORE);
+                mpi_errno = MPIC_Recv_ft((char *) recvbuf +
+                                         disps[recv_idx]*extent,
+                                         recv_cnt, datatype, dst,
+                                         MPIR_REDUCE_TAG, comm,
+                                         MPI_STATUS_IGNORE, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -690,6 +704,8 @@
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -763,7 +779,8 @@
     MPI_Datatype datatype, 
     MPI_Op op, 
     int root, 
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag )
 {
     int mpi_errno = MPI_SUCCESS;
     int mpi_errno_ret = MPI_SUCCESS;
@@ -808,9 +825,10 @@
         if (comm_ptr->node_comm != NULL &&
             MPIU_Get_intranode_rank(comm_ptr, root) == -1) {
             mpi_errno = MPIR_Reduce_impl(sendbuf, tmp_buf, count, datatype,
-                                         op, 0, comm_ptr->node_comm);
+                                         op, 0, comm_ptr->node_comm, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -824,9 +842,10 @@
                 void *buf = (comm_ptr->node_comm == NULL ? sendbuf : tmp_buf);
                 mpi_errno = MPIR_Reduce_impl(buf, NULL, count, datatype,
                                              op, MPIU_Get_internode_rank(comm_ptr, root),
-                                             comm_ptr->node_roots_comm);
+                                             comm_ptr->node_roots_comm, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -838,9 +857,10 @@
 
                     mpi_errno = MPIR_Reduce_impl(sendbuf, tmp_buf, count, datatype,
                                                  op, MPIU_Get_internode_rank(comm_ptr, root),
-                                                 comm_ptr->node_roots_comm);
+                                                 comm_ptr->node_roots_comm, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
@@ -853,9 +873,10 @@
 
                     mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype,
                                                  op, MPIU_Get_internode_rank(comm_ptr, root),
-                                                 comm_ptr->node_roots_comm);
+                                                 comm_ptr->node_roots_comm, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
@@ -872,9 +893,10 @@
             MPIU_Get_intranode_rank(comm_ptr, root) != -1) { 
             mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype,
                                          op, MPIU_Get_intranode_rank(comm_ptr, root),
-                                         comm_ptr->node_comm);
+                                         comm_ptr->node_comm, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -908,18 +930,20 @@
     if ((count*type_size > MPIR_PARAM_REDUCE_SHORT_MSG_SIZE) &&
         (HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) && (count >= pof2)) {
         /* do a reduce-scatter followed by gather to root. */
-        mpi_errno = MPIR_Reduce_redscat_gather(sendbuf, recvbuf, count, datatype, op, root, comm_ptr);
+        mpi_errno = MPIR_Reduce_redscat_gather(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
     }
     else {
         /* use a binomial tree algorithm */ 
-        mpi_errno = MPIR_Reduce_binomial(sendbuf, recvbuf, count, datatype, op, root, comm_ptr);
+        mpi_errno = MPIR_Reduce_binomial(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -934,6 +958,8 @@
 #endif
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -953,7 +979,8 @@
     MPI_Datatype datatype, 
     MPI_Op op, 
     int root, 
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag )
 {
 /*  Intercommunicator reduce.
     Remote group does a local intracommunicator
@@ -982,10 +1009,11 @@
 
     if (root == MPI_ROOT) {
         /* root receives data from rank 0 on remote group */
-        mpi_errno = MPIC_Recv(recvbuf, count, datatype, 0,
-                              MPIR_REDUCE_TAG, comm, &status);
+        mpi_errno = MPIC_Recv_ft(recvbuf, count, datatype, 0,
+                                 MPIR_REDUCE_TAG, comm, &status, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -1020,19 +1048,21 @@
         
         /* now do a local reduce on this intracommunicator */
         mpi_errno = MPIR_Reduce_intra(sendbuf, tmp_buf, count, datatype,
-                                      op, 0, newcomm_ptr);
+                                      op, 0, newcomm_ptr, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
 
         if (rank == 0)
 	{
-            mpi_errno = MPIC_Send(tmp_buf, count, datatype, root,
-                                  MPIR_REDUCE_TAG, comm); 
+            mpi_errno = MPIC_Send_ft(tmp_buf, count, datatype, root,
+                                     MPIR_REDUCE_TAG, comm, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -1044,6 +1074,8 @@
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
 
   fn_fail:
@@ -1060,19 +1092,19 @@
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
-                MPI_Op op, int root, MPID_Comm *comm_ptr)
+                MPI_Op op, int root, MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
         
     if (comm_ptr->comm_kind == MPID_INTRACOMM) {
         /* intracommunicator */
         mpi_errno = MPIR_Reduce_intra(sendbuf, recvbuf, count, datatype,
-                                      op, root, comm_ptr);
+                                      op, root, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         /* intercommunicator */
         mpi_errno = MPIR_Reduce_inter(sendbuf, recvbuf, count, datatype,
-                                      op, root, comm_ptr);
+                                      op, root, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -1091,24 +1123,24 @@
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Reduce_impl(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
-                     MPI_Op op, int root, MPID_Comm *comm_ptr)
+                     MPI_Op op, int root, MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
         
     if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Reduce != NULL) {
 	mpi_errno = comm_ptr->coll_fns->Reduce(sendbuf, recvbuf, count,
-                                               datatype, op, root, comm_ptr);
+                                               datatype, op, root, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         if (comm_ptr->comm_kind == MPID_INTRACOMM) {
             /* intracommunicator */
             mpi_errno = MPIR_Reduce_intra(sendbuf, recvbuf, count, datatype,
-                                          op, root, comm_ptr);
+                                          op, root, comm_ptr, errflag);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 	} else {
             /* intercommunicator */
             mpi_errno = MPIR_Reduce_inter(sendbuf, recvbuf, count, datatype,
-                                          op, root, comm_ptr);
+                                          op, root, comm_ptr, errflag);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         }
     }
@@ -1164,6 +1196,7 @@
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Comm *comm_ptr = NULL;
+    int errflag = FALSE;
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_REDUCE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -1271,7 +1304,7 @@
 
     /* ... body of routine ...  */
 
-    mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype, op, root, comm_ptr);
+    mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, &errflag);
     if (mpi_errno) goto fn_fail;
     
     /* ... end of body of routine ... */

Modified: mpich2/trunk/src/mpi/coll/scan.c
===================================================================
--- mpich2/trunk/src/mpi/coll/scan.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/scan.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -71,7 +71,8 @@
     int count, 
     MPI_Datatype datatype, 
     MPI_Op op, 
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag )
 {
     MPI_Status status;
     int        rank, comm_size;
@@ -167,13 +168,14 @@
         dst = rank ^ mask;
         if (dst < comm_size) {
             /* Send partial_scan to dst. Recv into tmp_buf */
-            mpi_errno = MPIC_Sendrecv(partial_scan, count, datatype,
-                                      dst, MPIR_SCAN_TAG, tmp_buf,
-                                      count, datatype, dst,
-                                      MPIR_SCAN_TAG, comm,
-                                      &status);
+            mpi_errno = MPIC_Sendrecv_ft(partial_scan, count, datatype,
+                                         dst, MPIR_SCAN_TAG, tmp_buf,
+                                         count, datatype, dst,
+                                         MPIR_SCAN_TAG, comm,
+                                         &status, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -235,6 +237,8 @@
     
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -256,7 +260,8 @@
     int count,
     MPI_Datatype datatype,
     MPI_Op op,
-    MPID_Comm *comm_ptr )
+    MPID_Comm *comm_ptr,
+    int *errflag )
 {
     int mpi_errno = MPI_SUCCESS;
     int mpi_errno_ret = MPI_SUCCESS;
@@ -278,7 +283,7 @@
 
     if (!MPIR_Comm_is_node_consecutive(comm_ptr)) {
         /* We can't use the SMP-aware algorithm, use the generic one */
-        return MPIR_Scan_generic(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+        return MPIR_Scan_generic(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
     }
     
     MPIU_THREADPRIV_GET;
@@ -310,9 +315,10 @@
     if (comm_ptr->node_comm != NULL)
     {
         mpi_errno = MPIR_Scan_impl(sendbuf, recvbuf, count, datatype, 
-                                   op, comm_ptr->node_comm);
+                                   op, comm_ptr->node_comm, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -330,11 +336,12 @@
        reduced data of rank 1,2,3. */
     if (comm_ptr->node_roots_comm != NULL && comm_ptr->node_comm != NULL)
     {
-        mpi_errno = MPIC_Recv(localfulldata, count, datatype, 
-                              comm_ptr->node_comm->local_size - 1, MPIR_SCAN_TAG, 
-                              comm_ptr->node_comm->handle, &status);
+        mpi_errno = MPIC_Recv_ft(localfulldata, count, datatype, 
+                                 comm_ptr->node_comm->local_size - 1, MPIR_SCAN_TAG, 
+                                 comm_ptr->node_comm->handle, &status, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -343,10 +350,11 @@
              comm_ptr->node_comm != NULL && 
              MPIU_Get_intranode_rank(comm_ptr, rank) == comm_ptr->node_comm->local_size - 1)
     {
-        mpi_errno = MPIC_Send(recvbuf, count, datatype,
-                              0, MPIR_SCAN_TAG, comm_ptr->node_comm->handle);
+        mpi_errno = MPIC_Send_ft(recvbuf, count, datatype,
+                                 0, MPIR_SCAN_TAG, comm_ptr->node_comm->handle, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -363,9 +371,10 @@
     if (comm_ptr->node_roots_comm != NULL)
     {
         mpi_errno = MPIR_Scan_impl(localfulldata, prefulldata, count, datatype,
-                                   op, comm_ptr->node_roots_comm);
+                                   op, comm_ptr->node_roots_comm, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -373,24 +382,26 @@
         if (MPIU_Get_internode_rank(comm_ptr, rank) != 
             comm_ptr->node_roots_comm->local_size-1)
         {
-            mpi_errno = MPIC_Send(prefulldata, count, datatype,
-                                  MPIU_Get_internode_rank(comm_ptr, rank) + 1,
-                                  MPIR_SCAN_TAG, comm_ptr->node_roots_comm->handle);
+            mpi_errno = MPIC_Send_ft(prefulldata, count, datatype,
+                                     MPIU_Get_internode_rank(comm_ptr, rank) + 1,
+                                     MPIR_SCAN_TAG, comm_ptr->node_roots_comm->handle, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
         }
         if (MPIU_Get_internode_rank(comm_ptr, rank) != 0)
         {
-            mpi_errno = MPIC_Recv(tempbuf, count, datatype,
-                                  MPIU_Get_internode_rank(comm_ptr, rank) - 1, 
-                                  MPIR_SCAN_TAG, comm_ptr->node_roots_comm->handle, 
-                                  &status);
+            mpi_errno = MPIC_Recv_ft(tempbuf, count, datatype,
+                                     MPIU_Get_internode_rank(comm_ptr, rank) - 1, 
+                                     MPIR_SCAN_TAG, comm_ptr->node_roots_comm->handle, 
+                                     &status, errflag);
             noneed = 0;
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -404,9 +415,10 @@
        reduce it with recvbuf to get final result if nessesary. */
 
     if (comm_ptr->node_comm != NULL) {
-        mpi_errno = MPIR_Bcast_impl(&noneed, 1, MPI_INT, 0, comm_ptr->node_comm);
+        mpi_errno = MPIR_Bcast_impl(&noneed, 1, MPI_INT, 0, comm_ptr->node_comm, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
+            *errflag = TRUE;
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
             MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
@@ -417,9 +429,10 @@
         int is_cxx_uop = 0;
 #endif
         if (comm_ptr->node_comm != NULL) {
-            mpi_errno = MPIR_Bcast_impl(tempbuf, count, datatype, 0, comm_ptr->node_comm);
+            mpi_errno = MPIR_Bcast_impl(tempbuf, count, datatype, 0, comm_ptr->node_comm, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -462,6 +475,8 @@
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
 
   fn_fail:
@@ -477,17 +492,17 @@
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Scan_impl(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
-                   MPI_Op op, MPID_Comm *comm_ptr)
+                   MPI_Op op, MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
 
     if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Scan != NULL) {
 	mpi_errno = comm_ptr->coll_fns->Scan(sendbuf, recvbuf, count,
-                                             datatype, op, comm_ptr);
+                                             datatype, op, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         mpi_errno = MPIR_Scan(sendbuf, recvbuf, count, datatype,
-                              op, comm_ptr);
+                              op, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
         
@@ -538,6 +553,7 @@
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Comm *comm_ptr = NULL;
+    int errflag = FALSE;
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_SCAN);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -605,7 +621,7 @@
 
     /* ... body of routine ...  */
 
-    mpi_errno = MPIR_Scan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr);
+    mpi_errno = MPIR_Scan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, &errflag);
     if (mpi_errno) goto fn_fail;
 
     /* ... end of body of routine ... */

Modified: mpich2/trunk/src/mpi/coll/scatter.c
===================================================================
--- mpich2/trunk/src/mpi/coll/scatter.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/scatter.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -58,7 +58,8 @@
 	int recvcnt, 
 	MPI_Datatype recvtype, 
 	int root, 
-	MPID_Comm *comm_ptr )
+	MPID_Comm *comm_ptr,
+        int *errflag )
 {
     MPI_Status status;
     MPI_Aint   extent=0;
@@ -169,20 +170,22 @@
                    they don't have to forward data to anyone. Others
                    receive data into a temporary buffer. */
                 if (relative_rank % 2) {
-                    mpi_errno = MPIC_Recv(recvbuf, recvcnt, recvtype,
-                                          src, MPIR_SCATTER_TAG, comm, 
-                                          &status);
+                    mpi_errno = MPIC_Recv_ft(recvbuf, recvcnt, recvtype,
+                                             src, MPIR_SCATTER_TAG, comm, 
+                                             &status, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
                 }
                 else {
-                    mpi_errno = MPIC_Recv(tmp_buf, tmp_buf_size, MPI_BYTE, src,
-                                          MPIR_SCATTER_TAG, comm, &status);
+                    mpi_errno = MPIC_Recv_ft(tmp_buf, tmp_buf_size, MPI_BYTE, src,
+                                             MPIR_SCATTER_TAG, comm, &status, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                         curr_cnt = 0;
@@ -211,24 +214,25 @@
 		{
                     send_subtree_cnt = curr_cnt - sendcnt * mask; 
                     /* mask is also the size of this process's subtree */
-                    mpi_errno = MPIC_Send (((char *)sendbuf + 
-                                            extent * sendcnt * mask),
-                                           send_subtree_cnt,
-                                           sendtype, dst, 
-                                           MPIR_SCATTER_TAG, comm);
+                    mpi_errno = MPIC_Send_ft(((char *)sendbuf + 
+                                              extent * sendcnt * mask),
+                                             send_subtree_cnt,
+                                             sendtype, dst,
+                                             MPIR_SCATTER_TAG, comm, errflag);
                 }
                 else
 		{
                     /* non-zero root and others */
                     send_subtree_cnt = curr_cnt - nbytes*mask; 
                     /* mask is also the size of this process's subtree */
-                    mpi_errno = MPIC_Send (((char *)tmp_buf + nbytes*mask),
-                                           send_subtree_cnt,
-                                           MPI_BYTE, dst,
-                                           MPIR_SCATTER_TAG, comm);
+                    mpi_errno = MPIC_Send_ft(((char *)tmp_buf + nbytes*mask),
+                                             send_subtree_cnt,
+                                             MPI_BYTE, dst,
+                                             MPIR_SCATTER_TAG, comm, errflag);
                 }
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -330,10 +334,11 @@
                 src = rank - mask; 
                 if (src < 0) src += comm_size;
                 
-                mpi_errno = MPIC_Recv(tmp_buf, tmp_buf_size, MPI_BYTE, src,
-                                     MPIR_SCATTER_TAG, comm, &status);
+                mpi_errno = MPIC_Recv_ft(tmp_buf, tmp_buf_size, MPI_BYTE, src,
+                                         MPIR_SCATTER_TAG, comm, &status, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     curr_cnt = 0;
@@ -359,11 +364,12 @@
                 
                 send_subtree_cnt = curr_cnt - nbytes * mask; 
                 /* mask is also the size of this process's subtree */
-                mpi_errno = MPIC_Send (((char *)tmp_buf + nbytes*mask),
-                                      send_subtree_cnt, MPI_BYTE, dst,
-                                      MPIR_SCATTER_TAG, comm);
+                mpi_errno = MPIC_Send_ft(((char *)tmp_buf + nbytes*mask),
+                                         send_subtree_cnt, MPI_BYTE, dst,
+                                         MPIR_SCATTER_TAG, comm, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -388,6 +394,8 @@
     MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -408,7 +416,8 @@
 	int recvcnt, 
 	MPI_Datatype recvtype, 
 	int root, 
-	MPID_Comm *comm_ptr )
+	MPID_Comm *comm_ptr,
+        int *errflag )
 {
 /*  Intercommunicator scatter.
     For short messages, root sends to rank 0 in remote group. rank 0
@@ -452,10 +461,11 @@
     if (nbytes < MPIR_PARAM_SCATTER_INTER_SHORT_MSG_SIZE) {
         if (root == MPI_ROOT) {
             /* root sends all data to rank 0 on remote group and returns */
-            mpi_errno = MPIC_Send(sendbuf, sendcnt*remote_size,
-                                  sendtype, 0, MPIR_SCATTER_TAG, comm);
+            mpi_errno = MPIC_Send_ft(sendbuf, sendcnt*remote_size,
+                                     sendtype, 0, MPIR_SCATTER_TAG, comm, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -480,11 +490,12 @@
                 /* adjust for potential negative lower bound in datatype */
                 tmp_buf = (void *)((char*)tmp_buf - true_lb);
 
-                mpi_errno = MPIC_Recv(tmp_buf, recvcnt*local_size,
-                                      recvtype, root,
-                                      MPIR_SCATTER_TAG, comm, &status);
+                mpi_errno = MPIC_Recv_ft(tmp_buf, recvcnt*local_size,
+                                         recvtype, root,
+                                         MPIR_SCATTER_TAG, comm, &status, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
@@ -499,9 +510,10 @@
             /* now do the usual scatter on this intracommunicator */
             mpi_errno = MPIR_Scatter_impl(tmp_buf, recvcnt, recvtype,
                                           recvbuf, recvcnt, recvtype, 0,
-                                          newcomm_ptr);
+                                          newcomm_ptr, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -512,21 +524,23 @@
         if (root == MPI_ROOT) {
             MPID_Datatype_get_extent_macro(sendtype, extent);
             for (i=0; i<remote_size; i++) {
-                mpi_errno = MPIC_Send(((char *)sendbuf+sendcnt*i*extent), 
-                                      sendcnt, sendtype, i,
-                                      MPIR_SCATTER_TAG, comm);
+                mpi_errno = MPIC_Send_ft(((char *)sendbuf+sendcnt*i*extent), 
+                                         sendcnt, sendtype, i,
+                                         MPIR_SCATTER_TAG, comm, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
+                    *errflag = TRUE;
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                 }
             }
         }
         else {
-            mpi_errno = MPIC_Recv(recvbuf,recvcnt,recvtype,root,
-                                  MPIR_SCATTER_TAG,comm,&status);
+            mpi_errno = MPIC_Recv_ft(recvbuf,recvcnt,recvtype,root,
+                                     MPIR_SCATTER_TAG,comm,&status, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -539,6 +553,8 @@
     MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -554,7 +570,7 @@
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Scatter(void *sendbuf, int sendcnt, MPI_Datatype sendtype,
                  void *recvbuf, int recvcnt, MPI_Datatype recvtype,
-                 int root, MPID_Comm *comm_ptr)
+                 int root, MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
         
@@ -562,13 +578,13 @@
         /* intracommunicator */
         mpi_errno = MPIR_Scatter_intra(sendbuf, sendcnt, sendtype,
                                        recvbuf, recvcnt, recvtype, root,
-                                       comm_ptr);
+                                       comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         /* intercommunicator */ 
         mpi_errno = MPIR_Scatter_inter(sendbuf, sendcnt, sendtype,
                                        recvbuf, recvcnt, recvtype, root,
-                                       comm_ptr);
+                                       comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
   
@@ -589,17 +605,17 @@
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Scatter_impl(void *sendbuf, int sendcnt, MPI_Datatype sendtype,
                       void *recvbuf, int recvcnt, MPI_Datatype recvtype,
-                      int root, MPID_Comm *comm_ptr)
+                      int root, MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
 
     if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Scatter != NULL) {
 	mpi_errno = comm_ptr->coll_fns->Scatter(sendbuf, sendcnt, sendtype,
-                                                recvbuf, recvcnt, recvtype, root, comm_ptr);
+                                                recvbuf, recvcnt, recvtype, root, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         mpi_errno = MPIR_Scatter(sendbuf, sendcnt, sendtype,
-                                 recvbuf, recvcnt, recvtype, root, comm_ptr);
+                                 recvbuf, recvcnt, recvtype, root, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
     
@@ -652,6 +668,7 @@
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Comm *comm_ptr = NULL;
+    int errflag = FALSE;
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_SCATTER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -756,7 +773,7 @@
 
     mpi_errno = MPIR_Scatter_impl(sendbuf, sendcnt, sendtype,
                                   recvbuf, recvcnt, recvtype, root,
-                                  comm_ptr);
+                                  comm_ptr, &errflag);
     if (mpi_errno) goto fn_fail;
 
     /* ... end of body of routine ... */

Modified: mpich2/trunk/src/mpi/coll/scatterv.c
===================================================================
--- mpich2/trunk/src/mpi/coll/scatterv.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/coll/scatterv.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -57,7 +57,8 @@
 	int recvcnt,  
 	MPI_Datatype recvtype, 
 	int root, 
-	MPID_Comm *comm_ptr )
+	MPID_Comm *comm_ptr,
+        int *errflag )
 {
     int rank, comm_size, mpi_errno = MPI_SUCCESS;
     int mpi_errno_ret = MPI_SUCCESS;
@@ -106,15 +107,15 @@
                     }
                 }
                 else {
-                    mpi_errno = MPIC_Isend(((char *)sendbuf+displs[i]*extent), 
-                                           sendcnts[i], sendtype, i,
-                                           MPIR_SCATTERV_TAG, comm, &reqarray[reqs++]);
+                    mpi_errno = MPIC_Isend_ft(((char *)sendbuf+displs[i]*extent), 
+                                              sendcnts[i], sendtype, i,
+                                              MPIR_SCATTERV_TAG, comm, &reqarray[reqs++], errflag);
                     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
                 }
             }
         }
         /* ... then wait for *all* of them to finish: */
-        mpi_errno = MPIR_Waitall_impl(reqs, reqarray, starray);
+        mpi_errno = MPIC_Waitall_ft(reqs, reqarray, starray, errflag);
         if (mpi_errno && mpi_errno != MPI_ERR_IN_STATUS) MPIU_ERR_POP(mpi_errno);
         /* --BEGIN ERROR HANDLING-- */
         if (mpi_errno == MPI_ERR_IN_STATUS) {
@@ -123,6 +124,7 @@
                     mpi_errno = starray[i].MPI_ERROR;
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
+                        *errflag = TRUE;
                         MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                         MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                     }
@@ -134,10 +136,11 @@
 
     else if (root != MPI_PROC_NULL) { /* non-root nodes, and in the intercomm. case, non-root nodes on remote side */
         if (recvcnt) {
-            mpi_errno = MPIC_Recv(recvbuf,recvcnt,recvtype,root,
-                                  MPIR_SCATTERV_TAG,comm,MPI_STATUS_IGNORE);
+            mpi_errno = MPIC_Recv_ft(recvbuf,recvcnt,recvtype,root,
+                                     MPIR_SCATTERV_TAG,comm,MPI_STATUS_IGNORE, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
+                *errflag = TRUE;
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
@@ -151,6 +154,8 @@
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
+    else if (*errflag)
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -166,19 +171,19 @@
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPIR_Scatterv_impl(void *sendbuf, int *sendcnts, int *displs, MPI_Datatype sendtype,
                        void *recvbuf, int recvcnt, MPI_Datatype recvtype,
-                       int root, MPID_Comm *comm_ptr)
+                       int root, MPID_Comm *comm_ptr, int *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
         
     if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Scatter != NULL) {
 	mpi_errno = comm_ptr->coll_fns->Scatterv(sendbuf, sendcnts, displs,
                                                  sendtype, recvbuf, recvcnt,
-                                                 recvtype, root, comm_ptr);
+                                                 recvtype, root, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     } else {
         mpi_errno = MPIR_Scatterv(sendbuf, sendcnts, displs, sendtype,
                                   recvbuf, recvcnt, recvtype,
-                                  root, comm_ptr);
+                                  root, comm_ptr, errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -232,6 +237,7 @@
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Comm *comm_ptr = NULL;
+    int errflag = FALSE;
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_SCATTERV);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
@@ -352,7 +358,7 @@
 
     mpi_errno = MPIR_Scatterv_impl(sendbuf, sendcnts, displs, sendtype,
                                    recvbuf, recvcnt, recvtype,
-                                   root, comm_ptr);
+                                   root, comm_ptr, &errflag);
     if (mpi_errno) goto fn_fail;
 
     /* ... end of body of routine ... */

Modified: mpich2/trunk/src/mpi/comm/comm_create.c
===================================================================
--- mpich2/trunk/src/mpi/comm/comm_create.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/comm/comm_create.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -334,7 +334,7 @@
     int rinfo[2];
     MPID_VCR *mapping_vcr = NULL;
     MPID_VCR *remote_mapping_vcr = NULL;
-
+    int errflag = FALSE;
     MPIU_CHKLMEM_DECL(1);
     MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE_INTER);
 
@@ -418,19 +418,20 @@
 
         /* Broadcast to the other members of the local group */
         mpi_errno = MPIR_Bcast_impl( rinfo, 2, MPI_INT, 0,
-                                     comm_ptr->local_comm);
+                                     comm_ptr->local_comm, &errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         mpi_errno = MPIR_Bcast_impl( remote_mapping, remote_size, MPI_INT, 0,
-                                     comm_ptr->local_comm);
+                                     comm_ptr->local_comm, &errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-        
+        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     }
     else {
         /* The other processes */
         /* Broadcast to the other members of the local group */
         mpi_errno = MPIR_Bcast_impl( rinfo, 2, MPI_INT, 0,
-                                     comm_ptr->local_comm);
+                                     comm_ptr->local_comm, &errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
         if (newcomm_ptr != NULL) {
             newcomm_ptr->context_id = rinfo[0];
         }
@@ -439,8 +440,9 @@
                             remote_size*sizeof(int),
                             mpi_errno,"remote_mapping");
         mpi_errno = MPIR_Bcast_impl( remote_mapping, remote_size, MPI_INT, 0,
-                                     comm_ptr->local_comm);
+                                     comm_ptr->local_comm, &errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     }
 
     if (group_ptr->rank != MPI_UNDEFINED) {

Modified: mpich2/trunk/src/mpi/comm/comm_split.c
===================================================================
--- mpich2/trunk/src/mpi/comm/comm_split.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/comm/comm_split.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -101,6 +101,7 @@
 	first_entry = 0, first_remote_entry = 0, *last_ptr;
     int in_newcomm; /* TRUE iff *newcomm should be populated */
     MPIR_Context_id_t   new_context_id, remote_context_id;
+    int errflag = FALSE;
     MPIU_CHKLMEM_DECL(4);
 
     rank        = comm_ptr->rank;
@@ -125,8 +126,9 @@
 	local_comm_ptr = comm_ptr;
     }
     /* Gather information on the local group of processes */
-    mpi_errno = MPIR_Allgather_impl( MPI_IN_PLACE, 2, MPI_INT, table, 2, MPI_INT, local_comm_ptr );
+    mpi_errno = MPIR_Allgather_impl( MPI_IN_PLACE, 2, MPI_INT, table, 2, MPI_INT, local_comm_ptr, &errflag );
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+    MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 
     /* Step 2: How many processes have our same color? */
     new_size = 0;
@@ -172,8 +174,9 @@
 	mypair.color = color;
 	mypair.key   = key;
 	mpi_errno = MPIR_Allgather_impl( &mypair, 2, MPI_INT, remotetable, 2, MPI_INT,
-                                         comm_ptr );
+                                         comm_ptr, &errflag );
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
         
 	/* Each process can now match its color with the entries in the table */
 	new_remote_size = 0;
@@ -219,13 +222,15 @@
 				       &remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 
 				       0, 0, comm_ptr->handle, MPI_STATUS_IGNORE );
 	    if (mpi_errno) { MPIU_ERR_POP( mpi_errno ); }
-	    mpi_errno = MPIR_Bcast_impl( &remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, local_comm_ptr );
+	    mpi_errno = MPIR_Bcast_impl( &remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, local_comm_ptr, &errflag );
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 	}
 	else {
 	    /* Broadcast to the other members of the local group */
-	    mpi_errno = MPIR_Bcast_impl( &remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, local_comm_ptr );
+	    mpi_errno = MPIR_Bcast_impl( &remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, local_comm_ptr, &errflag );
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 	}
     }
 

Modified: mpich2/trunk/src/mpi/comm/commutil.c
===================================================================
--- mpich2/trunk/src/mpi/comm/commutil.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/comm/commutil.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -764,7 +764,7 @@
     int          own_mask = 0;
     int          testCount = 10; /* if you change this value, you need to also change 
 				    it below where it is reinitialized */
-
+    int errflag = FALSE;
     MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GET_CONTEXTID);
 
     MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GET_CONTEXTID);
@@ -829,9 +829,9 @@
 	   other processes to enter the global or brief global critical section.
 	 */ 
 	mpi_errno = MPIR_Allreduce_impl( MPI_IN_PLACE, local_mask, MPIR_MAX_CONTEXT_MASK,
-                                         MPI_INT, MPI_BAND, comm_ptr );
+                                         MPI_INT, MPI_BAND, comm_ptr, &errflag );
 	if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-
+        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
         /* MT FIXME 2/3 cases don't seem to need the CONTEXTID CS, check and
          * narrow this region */
         MPIU_THREAD_CS_ENTER(CONTEXTID,);
@@ -889,8 +889,9 @@
             /* we _must_ release the lock above in order to avoid deadlocking on
              * this blocking allreduce operation */
 	    mpi_errno = MPIR_Allreduce_impl( &hasNoId, &totalHasNoId, 1, MPI_INT,
-                                             MPI_MAX, comm_ptr );
+                                             MPI_MAX, comm_ptr, &errflag );
 	    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 	    if (totalHasNoId == 1) {
 		/* Release the mask for use by other threads */
 		if (own_mask) {
@@ -956,6 +957,7 @@
 		        context instead?.  Or can we use the tag 
 		        provided in the intercomm routine? (not on a dup, 
 			but in that case it can use the collective context) */
+    int errflag = FALSE;
     MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID);
 
     MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID);
@@ -983,9 +985,9 @@
     /* Make sure that all of the local processes now have this
        id */
     mpi_errno = MPIR_Bcast_impl( &remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 
-                                 0, comm_ptr->local_comm );
+                                 0, comm_ptr->local_comm, &errflag );
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-
+    MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     /* The recvcontext_id must be the one that was allocated out of the local
      * group, not the remote group.  Otherwise we could end up posting two
      * MPI_ANY_SOURCE,MPI_ANY_TAG recvs on the same context IDs even though we

Modified: mpich2/trunk/src/mpi/comm/intercomm_create.c
===================================================================
--- mpich2/trunk/src/mpi/comm/intercomm_create.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/comm/intercomm_create.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -238,6 +238,7 @@
     int is_low_group = 0;
     int i;
     MPID_Comm *newcomm_ptr;
+    int errflag = FALSE;
     MPIU_CHKLMEM_DECL(4);
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_INTERCOMM_CREATE);
 
@@ -457,11 +458,13 @@
 	comm_info[1] = final_context_id;
 	comm_info[2] = is_low_group;
 	MPIU_DBG_MSG(COMM,VERBOSE,"About to bcast on local_comm");
-	mpi_errno = MPIR_Bcast_impl( comm_info, 3, MPI_INT, local_leader, comm_ptr );
+	mpi_errno = MPIR_Bcast_impl( comm_info, 3, MPI_INT, local_leader, comm_ptr, &errflag );
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 	mpi_errno = MPIR_Bcast_impl( remote_gpids, 2*remote_size, MPI_INT, local_leader,
-                                     comm_ptr );
+                                     comm_ptr, &errflag );
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 	MPIU_DBG_MSG_D(COMM,VERBOSE,"end of bcast on local_comm of size %d",
 		       comm_ptr->local_size );
     }
@@ -469,16 +472,18 @@
     {
 	/* we're the other processes */
 	MPIU_DBG_MSG(COMM,VERBOSE,"About to receive bcast on local_comm");
-	mpi_errno = MPIR_Bcast_impl( comm_info, 3, MPI_INT, local_leader, comm_ptr );
+	mpi_errno = MPIR_Bcast_impl( comm_info, 3, MPI_INT, local_leader, comm_ptr, &errflag );
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 	remote_size = comm_info[0];
 	MPIU_CHKLMEM_MALLOC(remote_gpids,int*,2*remote_size*sizeof(int),
 			    mpi_errno,"remote_gpids");
 	MPIU_CHKLMEM_MALLOC(remote_lpids,int*,remote_size*sizeof(int),
 			    mpi_errno,"remote_lpids");
 	mpi_errno = MPIR_Bcast_impl( remote_gpids, 2*remote_size, MPI_INT, local_leader, 
-                                    comm_ptr );
+                                     comm_ptr, &errflag );
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 
 	/* Extract the context and group sign informatin */
 	final_context_id = comm_info[1];

Modified: mpich2/trunk/src/mpi/comm/intercomm_merge.c
===================================================================
--- mpich2/trunk/src/mpi/comm/intercomm_merge.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/comm/intercomm_merge.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -75,6 +75,7 @@
     MPID_Comm *newcomm_ptr;
     int  local_high, remote_high, i, j, new_size;
     MPIR_Context_id_t new_context_id;
+    int errflag = FALSE;
     MPIU_THREADPRIV_DECL;
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_INTERCOMM_MERGE);
 
@@ -137,8 +138,9 @@
 	     error to make */
 	    acthigh = high ? 1 : 0;   /* Clamp high into 1 or 0 */
 	    mpi_errno = MPIR_Allreduce_impl( MPI_IN_PLACE, &acthigh, 1, MPI_INT,
-                                             MPI_SUM, comm_ptr->local_comm );
-	    if (mpi_errno) goto fn_fail;
+                                             MPI_SUM, comm_ptr->local_comm, &errflag );
+	    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 	    /* acthigh must either == 0 or the size of the local comm */
 	    if (acthigh != 0 && acthigh != comm_ptr->local_size) {
 		mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, 
@@ -196,9 +198,10 @@
        value of local_high, which may have changed if both groups
        of processes had the same value for high
     */
-    mpi_errno = MPIR_Bcast_impl( &local_high, 1, MPI_INT, 0, 
-                                 comm_ptr->local_comm );
-    if (mpi_errno) goto fn_fail;
+    mpi_errno = MPIR_Bcast_impl( &local_high, 1, MPI_INT, 0,
+                                 comm_ptr->local_comm, &errflag );
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+    MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 
     mpi_errno = MPIR_Comm_create( &newcomm_ptr );
     if (mpi_errno) goto fn_fail;

Modified: mpich2/trunk/src/mpi/errhan/errnames.txt
===================================================================
--- mpich2/trunk/src/mpi/errhan/errnames.txt	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/errhan/errnames.txt	2011-01-21 18:32:12 UTC (rev 7803)
@@ -876,6 +876,8 @@
 **tcp_cleanup_fail:Error while cleaning up failed connection
 **tmpvc_connect_fail:Failure during connection protocol
 
+**coll_fail:Failure during collective
+
 **blcr_mod:BLCR kernel module not present
 
 **envvarparse:Unable to parse environment variable

Modified: mpich2/trunk/src/mpi/topo/dist_gr_create.c
===================================================================
--- mpich2/trunk/src/mpi/topo/dist_gr_create.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpi/topo/dist_gr_create.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -81,6 +81,7 @@
     int *rout_idx;
     int *rs;
     int in_out_peers[2] = {-1, -1};
+    int errflag = FALSE;
     MPIU_CHKLMEM_DECL(9);
     MPIU_CHKPMEM_DECL(1);
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_DIST_GRAPH_CREATE);
@@ -252,8 +253,9 @@
     }
 
     /* compute the number of peers I will recv from */
-    mpi_errno = MPIR_Reduce_scatter_block_impl(rs, in_out_peers, 2, MPI_INT, MPI_SUM, comm_ptr);
+    mpi_errno = MPIR_Reduce_scatter_block_impl(rs, in_out_peers, 2, MPI_INT, MPI_SUM, comm_ptr, &errflag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+    MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 
     MPIU_Assert(in_out_peers[0] <= comm_size && in_out_peers[0] >= 0);
     MPIU_Assert(in_out_peers[1] <= comm_size && in_out_peers[1] >= 0);


Property changes on: mpich2/trunk/src/mpid
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/error-return/src/mpid:7405-7603,7662-7670
   + /mpich2/branches/dev/coll-err-ret/src/mpid:7771-7802
/mpich2/branches/dev/error-return/src/mpid:7405-7603,7662-7670


Property changes on: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5050
/mpich2/branches/dev/ckpt2/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5057-6537
/mpich2/branches/dev/error-return/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:7405-7603,7662-7670
/mpich2/branches/dev/ftb/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5661-5730
/mpich2/branches/dev/lapi/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5817
/mpich2/branches/dev/win_rrvm/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:6416,6428
/mpich2/branches/dev/wintcp_async_progress/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5406
   + /mpich2/branches/dev/ckpt/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5050
/mpich2/branches/dev/ckpt2/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5057-6537
/mpich2/branches/dev/coll-err-ret/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:7771-7802
/mpich2/branches/dev/error-return/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:7405-7603,7662-7670
/mpich2/branches/dev/ftb/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5661-5730
/mpich2/branches/dev/lapi/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5817
/mpich2/branches/dev/win_rrvm/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:6416,6428
/mpich2/branches/dev/wintcp_async_progress/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5406

Modified: mpich2/trunk/src/mpid/ch3/src/ch3u_comm_spawn_multiple.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/src/ch3u_comm_spawn_multiple.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpid/ch3/src/ch3u_comm_spawn_multiple.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -256,14 +256,17 @@
     }
 
     if (errcodes != MPI_ERRCODES_IGNORE) {
-        mpi_errno = MPIR_Bcast_impl(&should_accept, 1, MPI_INT, root, comm_ptr);
+        int errflag = FALSE;
+        mpi_errno = MPIR_Bcast_impl(&should_accept, 1, MPI_INT, root, comm_ptr, &errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
-        mpi_errno = MPIR_Bcast_impl(&total_num_processes, 1, MPI_INT, root, comm_ptr);
+        mpi_errno = MPIR_Bcast_impl(&total_num_processes, 1, MPI_INT, root, comm_ptr, &errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         
-        mpi_errno = MPIR_Bcast_impl(errcodes, total_num_processes, MPI_INT, root, comm_ptr);
+        mpi_errno = MPIR_Bcast_impl(errcodes, total_num_processes, MPI_INT, root, comm_ptr, &errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     }
 
     if (should_accept) {

Modified: mpich2/trunk/src/mpid/ch3/src/ch3u_port.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/src/ch3u_port.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpid/ch3/src/ch3u_port.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -343,6 +343,7 @@
     pg_node *pg_list = NULL;
     MPIDI_PG_t **remote_pg = NULL;
     MPIR_Context_id_t recvcontext_id = MPIR_INVALID_CONTEXT_ID;
+    int errflag = FALSE;
     MPIU_CHKLMEM_DECL(3);
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_COMM_CONNECT);
 
@@ -403,10 +404,9 @@
 
     /* broadcast the received info to local processes */
     MPIU_DBG_MSG(CH3_CONNECT,VERBOSE,"broadcasting the received 3 ints");
-    mpi_errno = MPIR_Bcast_intra(recv_ints, 3, MPI_INT, root, comm_ptr);
-    if (mpi_errno) {
-	MPIU_ERR_POP(mpi_errno);
-    }
+    mpi_errno = MPIR_Bcast_intra(recv_ints, 3, MPI_INT, root, comm_ptr, &errflag);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+    MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 
     /* check if root was unable to connect to the port */
     MPIU_ERR_CHKANDJUMP1(recv_ints[0] == -1, mpi_errno, MPI_ERR_PORT, "**portexist", "**portexist %s", port_name);
@@ -461,10 +461,10 @@
     /* Broadcast out the remote rank translation array */
     MPIU_DBG_MSG(CH3_CONNECT,VERBOSE,"Broadcasting remote translation");
     mpi_errno = MPIR_Bcast_intra(remote_translation, remote_comm_size * 2, MPI_INT,
-			   root, comm_ptr);
-    if (mpi_errno) {
-	MPIU_ERR_POP(mpi_errno);
-    }
+                                 root, comm_ptr, &errflag);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+    MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
 #ifdef MPICH_DBG_OUTPUT
     MPIU_DBG_PRINTF(("[%d]connect:Received remote_translation after broadcast:\n", rank));
     for (i=0; i<remote_comm_size; i++)
@@ -547,8 +547,12 @@
 
         /* notify other processes to return an error */
         MPIU_DBG_MSG(CH3_CONNECT,VERBOSE,"broadcasting 3 ints: error case");
-        mpi_errno2 = MPIR_Bcast_intra(recv_ints, 3, MPI_INT, root, comm_ptr);
+        mpi_errno2 = MPIR_Bcast_intra(recv_ints, 3, MPI_INT, root, comm_ptr, &errflag);
         if (mpi_errno2) MPIU_ERR_ADD(mpi_errno, mpi_errno2);
+        if (errflag) {
+            MPIU_ERR_SET(mpi_errno2, MPI_ERR_OTHER, "**coll_fail");
+            MPIU_ERR_ADD(mpi_errno, mpi_errno2);
+        }
         goto fn_fail;
     }
 }
@@ -685,6 +689,7 @@
     int  rank = comm_ptr->rank;
     int  mpi_errno = 0;
     int  recvtag = *recvtag_p;
+    int errflag = FALSE;
     MPIDI_STATE_DECL(MPID_STATE_RECEIVEPGANDDISTRIBUTE);
 
     MPIDI_FUNC_ENTER(MPID_STATE_RECEIVEPGANDDISTRIBUTE);
@@ -713,10 +718,9 @@
 
 	/* Broadcast the size and data to the local communicator */
 	/*printf("accept:broadcasting 1 int\n");fflush(stdout);*/
-	mpi_errno = MPIR_Bcast_intra(&j, 1, MPI_INT, root, comm_ptr);
-	if (mpi_errno != MPI_SUCCESS) {
-	    MPIU_ERR_POP(mpi_errno);
-	}
+	mpi_errno = MPIR_Bcast_intra(&j, 1, MPI_INT, root, comm_ptr, &errflag);
+	if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 
 	if (rank != root) {
 	    /* The root has already allocated this string */
@@ -726,10 +730,9 @@
 	    }
 	}
 	/*printf("accept:broadcasting string of length %d\n", j);fflush(stdout);*/
-	mpi_errno = MPIR_Bcast_intra(pg_str, j, MPI_CHAR, root, comm_ptr);
-	if (mpi_errno != MPI_SUCCESS) {
-	    MPIU_ERR_POP(mpi_errno);
-	}
+	mpi_errno = MPIR_Bcast_intra(pg_str, j, MPI_CHAR, root, comm_ptr, &errflag);
+	if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 	/* Then reconstruct the received process group.  This step
 	   also initializes the created process group */
 
@@ -762,6 +765,7 @@
     pg_translation *local_translation = 0;
     pg_node *pg_list, *pg_next, *pg_head = 0;
     int rank, i, peer_comm_size;
+    int errflag = FALSE;
     MPIU_CHKLMEM_DECL(1);
 
     peer_comm_size = comm_p->local_size;
@@ -778,9 +782,9 @@
     }
 
     /* Now, broadcast the number of local pgs */
-    mpi_errno = MPIR_Bcast_impl( &n_local_pgs, 1, MPI_INT, root, comm_p);
+    mpi_errno = MPIR_Bcast_impl( &n_local_pgs, 1, MPI_INT, root, comm_p, &errflag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-    
+    MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 
     pg_list = pg_head;
     for (i=0; i<n_local_pgs; i++) {
@@ -799,8 +803,9 @@
 	    len     = pg_list->lenStr;
 	    pg_list = pg_list->next;
 	}
-	mpi_errno = MPIR_Bcast_impl( &len, 1, MPI_INT, root, comm_p);
+	mpi_errno = MPIR_Bcast_impl( &len, 1, MPI_INT, root, comm_p, &errflag);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 	if (rank != root) {
 	    pg_str = (char *)MPIU_Malloc(len);
             if (!pg_str) {
@@ -808,12 +813,14 @@
                 goto fn_exit;
             }
 	}
-	mpi_errno = MPIR_Bcast_impl( pg_str, len, MPI_CHAR, root, comm_p);
+	mpi_errno = MPIR_Bcast_impl( pg_str, len, MPI_CHAR, root, comm_p, &errflag);
         if (mpi_errno) {
             if (rank != root)
                 MPIU_Free( pg_str );
             MPIU_ERR_POP(mpi_errno);
         }
+        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
 	if (rank != root) {
 	    /* flag is true if the pg was created, false if it
 	       already existed. This step
@@ -930,6 +937,7 @@
     pg_translation *local_translation = NULL, *remote_translation = NULL;
     pg_node *pg_list = NULL;
     MPIDI_PG_t **remote_pg = NULL;
+    int errflag = FALSE;
     MPIU_CHKLMEM_DECL(3);
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_COMM_ACCEPT);
 
@@ -991,11 +999,11 @@
 
     /* broadcast the received info to local processes */
     /*printf("accept:broadcasting 2 ints - %d and %d\n", recv_ints[0], recv_ints[1]);fflush(stdout);*/
-    mpi_errno = MPIR_Bcast_intra(recv_ints, 3, MPI_INT, root, comm_ptr);
-    if (mpi_errno) {
-	MPIU_ERR_POP(mpi_errno);
-    }
+    mpi_errno = MPIR_Bcast_intra(recv_ints, 3, MPI_INT, root, comm_ptr, &errflag);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+    MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 
+
     n_remote_pgs     = recv_ints[0];
     remote_comm_size = recv_ints[1];
     context_id       = recv_ints[2];
@@ -1042,7 +1050,9 @@
     /* Broadcast out the remote rank translation array */
     MPIU_DBG_MSG(CH3_CONNECT,VERBOSE,"Broadcast remote_translation");
     mpi_errno = MPIR_Bcast_intra(remote_translation, remote_comm_size * 2, MPI_INT, 
-			   root, comm_ptr);
+                                 root, comm_ptr, &errflag);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+    MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 #ifdef MPICH_DBG_OUTPUT
     MPIU_DBG_PRINTF(("[%d]accept:Received remote_translation after broadcast:\n", rank));
     for (i=0; i<remote_comm_size; i++)

Modified: mpich2/trunk/src/mpid/ch3/src/ch3u_rma_ops.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/src/ch3u_rma_ops.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpid/ch3/src/ch3u_rma_ops.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -33,6 +33,7 @@
     int mpi_errno=MPI_SUCCESS, i, k, comm_size, rank;
     MPI_Aint *tmp_buf;
     MPID_Comm *win_comm_ptr;
+    int errflag = FALSE;
     MPIU_CHKPMEM_DECL(4);
     MPIU_CHKLMEM_DECL(1);
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_CREATE);
@@ -123,10 +124,12 @@
     
     mpi_errno = MPIR_Allgather_impl(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
                                     tmp_buf, 3 * sizeof(MPI_Aint), MPI_BYTE,
-                                    comm_ptr);
+                                    comm_ptr, &errflag);
     MPIU_INSTR_DURATION_END(wincreate_allgather);
     if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+    MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 
+
     k = 0;
     for (i=0; i<comm_size; i++)
     {
@@ -158,7 +161,7 @@
     int mpi_errno=MPI_SUCCESS, total_pt_rma_puts_accs;
     int in_use;
     MPID_Comm *comm_ptr;
-    
+    int errflag = FALSE;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_FREE);
         
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_WIN_FREE);
@@ -167,8 +170,9 @@
     MPIU_INSTR_DURATION_START(winfree_rs);
     mpi_errno = MPIR_Reduce_scatter_block_impl((*win_ptr)->pt_rma_puts_accs, 
                                                &total_pt_rma_puts_accs, 1, 
-                                               MPI_INT, MPI_SUM, comm_ptr);
+                                               MPI_INT, MPI_SUM, comm_ptr, &errflag);
     if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+    MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     MPIU_INSTR_DURATION_END(winfree_rs);
 
     if (total_pt_rma_puts_accs != (*win_ptr)->my_pt_rma_puts_accs)

Modified: mpich2/trunk/src/mpid/ch3/src/ch3u_rma_sync.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/src/ch3u_rma_sync.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpid/ch3/src/ch3u_rma_sync.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -116,6 +116,7 @@
     MPID_Comm *comm_ptr;
     MPI_Win source_win_handle, target_win_handle;
     MPID_Progress_state progress_state;
+    int errflag = FALSE;
     MPIU_CHKLMEM_DECL(3);
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_FENCE);
 
@@ -213,10 +214,11 @@
 	win_ptr->my_counter = comm_size;
             
 	mpi_errno = MPIR_Reduce_scatter_block_impl(MPI_IN_PLACE, rma_target_proc, 1,
-                                                   MPI_INT, MPI_SUM, comm_ptr);
+                                                   MPI_INT, MPI_SUM, comm_ptr, &errflag);
 	MPIU_INSTR_DURATION_END(winfence_rs);
 	/* result is stored in rma_target_proc[0] */
 	if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 
 	/* Set the completion counter */
 	/* FIXME: MT: this needs to be done atomically because other

Modified: mpich2/trunk/src/mpid/ch3/src/mpid_vc.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/src/mpid_vc.c	2011-01-21 04:18:03 UTC (rev 7802)
+++ mpich2/trunk/src/mpid/ch3/src/mpid_vc.c	2011-01-21 18:32:12 UTC (rev 7803)
@@ -573,7 +573,8 @@
     int i, allfound = 1, pgid, pgidWorld;
     MPIDI_PG_t *pg = 0;
     MPIDI_PG_iterator iter;
-
+    int errflag = FALSE;
+    
     /* Get the pgid for CommWorld (always attached to the first process 
        group) */
     MPIDI_PG_Get_iterator(&iter);
@@ -600,9 +601,10 @@
     }
 
     /* See if everyone is happy */
-    mpi_errno = MPIR_Allreduce_impl( MPI_IN_PLACE, &allfound, 1, MPI_INT, MPI_LAND, comm_ptr );
+    mpi_errno = MPIR_Allreduce_impl( MPI_IN_PLACE, &allfound, 1, MPI_INT, MPI_LAND, comm_ptr, &errflag );
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-
+    MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+    
     if (allfound) return MPI_SUCCESS;
 
     /* FIXME: We need a cleaner way to handle this case than using an ifdef.


Property changes on: mpich2/trunk/src/mpl/src/mplstr.c
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt2/src/mpl/src/string/mplstr.c:5182,5196,5198
/mpich2/branches/dev/error-return/src/mpl/src/mplstr.c:7662-7670
/mpich2/branches/dev/ftb/src/mpl/src/mplstr.c:5661-5730
/mpich2/branches/dev/lapi/src/mpl/src/mplstr.c:5817
/mpich2/branches/release/mpich2-1.1.1/src/mpl/src/string/mplstr.c:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/mpl/src/string/mplstr.c:5406
   + /mpich2/branches/dev/ckpt2/src/mpl/src/string/mplstr.c:5182,5196,5198
/mpich2/branches/dev/coll-err-ret/src/mpl/src/mplstr.c:7771-7802
/mpich2/branches/dev/error-return/src/mpl/src/mplstr.c:7662-7670
/mpich2/branches/dev/ftb/src/mpl/src/mplstr.c:5661-5730
/mpich2/branches/dev/lapi/src/mpl/src/mplstr.c:5817
/mpich2/branches/release/mpich2-1.1.1/src/mpl/src/string/mplstr.c:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/mpl/src/string/mplstr.c:5406


Property changes on: mpich2/trunk/src/pm/hydra
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt/src/pm/hydra:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra:7662-7670*
/mpich2/branches/dev/ftb/src/pm/hydra:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra:5406
   + /mpich2/branches/dev/ckpt/src/pm/hydra:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra:7771-7802*
/mpich2/branches/dev/error-return/src/pm/hydra:7662-7670*
/mpich2/branches/dev/ftb/src/pm/hydra:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra:5406


Property changes on: mpich2/trunk/src/pm/hydra/Makefile.am
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt/src/pm/hydra/Makefile.am:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/Makefile.am:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/Makefile.am:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/Makefile.am:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/Makefile.am:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/Makefile.am:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/Makefile.am:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/Makefile.am:5406
   + /mpich2/branches/dev/ckpt/src/pm/hydra/Makefile.am:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/Makefile.am:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/Makefile.am:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/Makefile.am:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/Makefile.am:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/Makefile.am:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/Makefile.am:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/Makefile.am:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/Makefile.am:5406


Property changes on: mpich2/trunk/src/pm/hydra/README
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt/src/pm/hydra/README:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/README:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/README:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/README:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/README:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/README:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/README:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/README:5406
   + /mpich2/branches/dev/ckpt/src/pm/hydra/README:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/README:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/README:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/README:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/README:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/README:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/README:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/README:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/README:5406


Property changes on: mpich2/trunk/src/pm/hydra/autogen.sh
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt/src/pm/hydra/autogen.sh:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/autogen.sh:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/autogen.sh:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/autogen.sh:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/autogen.sh:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/autogen.sh:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/autogen.sh:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/autogen.sh:5406
   + /mpich2/branches/dev/ckpt/src/pm/hydra/autogen.sh:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/autogen.sh:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/autogen.sh:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/autogen.sh:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/autogen.sh:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/autogen.sh:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/autogen.sh:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/autogen.sh:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/autogen.sh:5406


Property changes on: mpich2/trunk/src/pm/hydra/configure.in
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt/src/pm/hydra/configure.in:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/configure.in:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/configure.in:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/configure.in:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/configure.in:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/configure.in:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/configure.in:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/configure.in:5406
   + /mpich2/branches/dev/ckpt/src/pm/hydra/configure.in:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/configure.in:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/configure.in:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/configure.in:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/configure.in:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/configure.in:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/configure.in:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/configure.in:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/configure.in:5406


Property changes on: mpich2/trunk/src/pm/hydra/examples
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt/src/pm/hydra/examples:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/examples:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/examples:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/examples:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/examples:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/examples:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/examples:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/examples:5406
   + /mpich2/branches/dev/ckpt/src/pm/hydra/examples:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/examples:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/examples:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/examples:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/examples:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/examples:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/examples:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/examples:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/examples:5406


Property changes on: mpich2/trunk/src/pm/hydra/hydra-doxygen.cfg.in
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt/src/pm/hydra/hydra-doxygen.cfg.in:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/hydra-doxygen.cfg.in:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/hydra-doxygen.cfg.in:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/hydra-doxygen.cfg.in:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/hydra-doxygen.cfg.in:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/hydra-doxygen.cfg.in:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/hydra-doxygen.cfg.in:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/hydra-doxygen.cfg.in:5406
   + /mpich2/branches/dev/ckpt/src/pm/hydra/hydra-doxygen.cfg.in:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/hydra-doxygen.cfg.in:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/hydra-doxygen.cfg.in:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/hydra-doxygen.cfg.in:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/hydra-doxygen.cfg.in:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/hydra-doxygen.cfg.in:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/hydra-doxygen.cfg.in:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/hydra-doxygen.cfg.in:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/hydra-doxygen.cfg.in:5406


Property changes on: mpich2/trunk/src/pm/hydra/include
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt/src/pm/hydra/include:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/include:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/include:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/include:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/include:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/include:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/include:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/include:5406
   + /mpich2/branches/dev/ckpt/src/pm/hydra/include:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/include:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/include:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/include:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/include:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/include:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/include:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/include:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/include:5406


Property changes on: mpich2/trunk/src/pm/hydra/mpich2prereq
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt/src/pm/hydra/mpich2prereq:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/mpich2prereq:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/mpich2prereq:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/mpich2prereq:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/mpich2prereq:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/mpich2prereq:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/mpich2prereq:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/mpich2prereq:5406
   + /mpich2/branches/dev/ckpt/src/pm/hydra/mpich2prereq:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/mpich2prereq:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/mpich2prereq:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/mpich2prereq:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/mpich2prereq:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/mpich2prereq:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/mpich2prereq:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/mpich2prereq:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/mpich2prereq:5406


Property changes on: mpich2/trunk/src/pm/hydra/pm
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt/src/pm/hydra/pm:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/pm:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/pm:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/pm:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/pm:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/pm:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/pm:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/pm:5406
   + /mpich2/branches/dev/ckpt/src/pm/hydra/pm:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/pm:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/pm:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/pm:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/pm:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/pm:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/pm:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/pm:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/pm:5406


Property changes on: mpich2/trunk/src/pm/hydra/tools
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt/src/pm/hydra/tools:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/tools:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/tools:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/tools:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/tools:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/tools:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/tools:5406
   + /mpich2/branches/dev/ckpt/src/pm/hydra/tools:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/tools:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/tools:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/tools:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/tools:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/tools:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/tools:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/tools:5406


Property changes on: mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm_query_proxy_id.c
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/error-return/src/pm/hydra/tools/bootstrap/external/slurm_query_proxy_id.c:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/tools/bootstrap/slurm/slurm_query_proxy_id.c:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools/bootstrap/slurm/slurm_query_proxy_id.c:5817
   + /mpich2/branches/dev/coll-err-ret/src/pm/hydra/tools/bootstrap/external/slurm_query_proxy_id.c:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/tools/bootstrap/external/slurm_query_proxy_id.c:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/tools/bootstrap/slurm/slurm_query_proxy_id.c:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools/bootstrap/slurm/slurm_query_proxy_id.c:5817


Property changes on: mpich2/trunk/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/error-return/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c:5817
   + /mpich2/branches/dev/coll-err-ret/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c:5817


Property changes on: mpich2/trunk/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/error-return/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c:5817
   + /mpich2/branches/dev/coll-err-ret/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c:5817


Property changes on: mpich2/trunk/src/pm/hydra/ui
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt/src/pm/hydra/ui:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/ui:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/ui:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/ui:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/ui:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/ui:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/ui:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/ui:5406
   + /mpich2/branches/dev/ckpt/src/pm/hydra/ui:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/ui:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/ui:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/ui:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/ui:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/ui:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/ui:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/ui:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/ui:5406


Property changes on: mpich2/trunk/src/pm/hydra/utils
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt/src/pm/hydra/utils:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/utils:5057-6537
/mpich2/branches/dev/error-return/src/pm/hydra/utils:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/utils:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/utils:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/utils:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/utils:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/utils:5406
   + /mpich2/branches/dev/ckpt/src/pm/hydra/utils:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/utils:5057-6537
/mpich2/branches/dev/coll-err-ret/src/pm/hydra/utils:7771-7802
/mpich2/branches/dev/error-return/src/pm/hydra/utils:7662-7670
/mpich2/branches/dev/ftb/src/pm/hydra/utils:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/utils:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/utils:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/utils:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/utils:5406


Property changes on: mpich2/trunk/winconfigure.wsf
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt/winconfigure.wsf:5050
/mpich2/branches/dev/ckpt2/winconfigure.wsf:5057-6537
/mpich2/branches/dev/error-return/winconfigure.wsf:7662-7670
/mpich2/branches/dev/ftb/winconfigure.wsf:5661-5730
/mpich2/branches/dev/lapi/winconfigure.wsf:5817
/mpich2/branches/dev/win_rrvm/winconfigure.wsf:6404,6407-6408,6420,6422-6423
/mpich2/branches/dev/wintcp_async_progress/winconfigure.wsf:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/winconfigure.wsf:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/winconfigure.wsf:5406
   + /mpich2/branches/dev/ckpt/winconfigure.wsf:5050
/mpich2/branches/dev/ckpt2/winconfigure.wsf:5057-6537
/mpich2/branches/dev/coll-err-ret/winconfigure.wsf:7771-7802
/mpich2/branches/dev/error-return/winconfigure.wsf:7662-7670
/mpich2/branches/dev/ftb/winconfigure.wsf:5661-5730
/mpich2/branches/dev/lapi/winconfigure.wsf:5817
/mpich2/branches/dev/win_rrvm/winconfigure.wsf:6404,6407-6408,6420,6422-6423
/mpich2/branches/dev/wintcp_async_progress/winconfigure.wsf:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/winconfigure.wsf:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/winconfigure.wsf:5406



More information about the mpich2-commits mailing list