[mpich2-commits] r7819 - in mpich2/trunk/src: mpi/coll util/param
buntinas at mcs.anl.gov
buntinas at mcs.anl.gov
Mon Jan 24 17:15:45 CST 2011
Author: buntinas
Date: 2011-01-24 17:15:45 -0600 (Mon, 24 Jan 2011)
New Revision: 7819
Modified:
mpich2/trunk/src/mpi/coll/helper_fns.c
mpich2/trunk/src/util/param/params.yml
Log:
added parameter to enable collective error returns. disabled by default
Modified: mpich2/trunk/src/mpi/coll/helper_fns.c
===================================================================
--- mpich2/trunk/src/mpi/coll/helper_fns.c 2011-01-24 20:59:23 UTC (rev 7818)
+++ mpich2/trunk/src/mpi/coll/helper_fns.c 2011-01-24 23:15:45 UTC (rev 7819)
@@ -584,7 +584,7 @@
MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
- if (*errflag)
+ if (*errflag && MPIR_PARAM_ENABLE_COLL_FT_RET)
mpi_errno = MPIC_Send(buf, count, datatype, dest, MPIR_ERROR_TAG, comm);
else
mpi_errno = MPIC_Send(buf, count, datatype, dest, tag, comm);
@@ -611,6 +611,11 @@
MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
+ if (!MPIR_PARAM_ENABLE_COLL_FT_RET) {
+ mpi_errno = MPIC_Recv(buf, count, datatype, source, tag, comm, status);
+ goto fn_exit;
+ }
+
if (status == MPI_STATUS_IGNORE)
status = &mystatus;
@@ -650,7 +655,7 @@
MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
- if (*errflag)
+ if (*errflag && MPIR_PARAM_ENABLE_COLL_FT_RET)
mpi_errno = MPIC_Ssend(buf, count, datatype, dest, MPIR_ERROR_TAG, comm);
else
mpi_errno = MPIC_Ssend(buf, count, datatype, dest, tag, comm);
@@ -679,6 +684,13 @@
MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
+ if (!MPIR_PARAM_ENABLE_COLL_FT_RET) {
+ mpi_errno = MPIC_Sendrecv(sendbuf, sendcount, sendtype, dest, sendtag,
+ recvbuf, recvcount, recvtype, source, recvtag,
+ comm, status);
+ goto fn_exit;
+ }
+
if (status == MPI_STATUS_IGNORE)
status = &mystatus;
@@ -728,13 +740,21 @@
MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
+ if (!MPIR_PARAM_ENABLE_COLL_FT_RET) {
+ mpi_errno = MPIC_Sendrecv_replace(buf, count, datatype,
+ dest, sendtag,
+ source, recvtag,
+ comm, status);
+ goto fn_exit;
+ }
+
if (status == MPI_STATUS_IGNORE)
status = &mystatus;
if (*errflag) {
mpi_errno = MPIC_Sendrecv_replace(buf, count, datatype,
dest, MPIR_ERROR_TAG,
- source, recvtag,
+ source, MPI_ANY_TAG,
comm, status);
goto fn_exit;
}
@@ -775,7 +795,7 @@
MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
- if (*errflag)
+ if (*errflag && MPIR_PARAM_ENABLE_COLL_FT_RET)
mpi_errno = MPIC_Isend(buf, count, datatype, dest, MPIR_ERROR_TAG, comm, request);
else
mpi_errno = MPIC_Isend(buf, count, datatype, dest, tag, comm, request);
@@ -799,7 +819,10 @@
MPIDI_FUNC_ENTER(MPID_STATE_MPIC_IRECV_FT);
- mpi_errno = MPIC_Irecv(buf, count, datatype, source, MPI_ANY_TAG, comm, request);
+ if (MPIR_PARAM_ENABLE_COLL_FT_RET)
+ mpi_errno = MPIC_Irecv(buf, count, datatype, source, MPI_ANY_TAG, comm, request);
+ else
+ mpi_errno = MPIC_Irecv(buf, count, datatype, source, tag, comm, request);
fn_exit:
MPIDI_FUNC_EXIT(MPID_STATE_MPIC_IRECV_FT);
@@ -828,7 +851,7 @@
mpi_errno = MPIR_Waitall_impl(numreq, requests, statuses);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
- if (*errflag)
+ if (*errflag || !MPIR_PARAM_ENABLE_COLL_FT_RET)
goto fn_exit;
for (i = 0; i < numreq; ++i) {
Modified: mpich2/trunk/src/util/param/params.yml
===================================================================
--- mpich2/trunk/src/util/param/params.yml 2011-01-24 20:59:23 UTC (rev 7818)
+++ mpich2/trunk/src/util/param/params.yml 2011-01-24 23:15:45 UTC (rev 7819)
@@ -32,6 +32,8 @@
description : parameters relevant to the "MPIR" debugger interface
- name : checkpointing
description : parameters relevant to checkpointing
+ - name : fault_tolerance
+ description : parameters that control fault tolerance behavior
- name : threads
description : multi-threading parameters
- name : nemesis
@@ -276,6 +278,19 @@
checkpointing library cannot be initialized.
##############################################################
+ # fault-tolerance parameters
+ - category : fault_tolerance
+ name : ENABLE_COLL_FT_RET
+ type : boolean
+ default : false
+ description : >-
+ Collectives called on a communicator with a failed process
+ should not hang, however the result of the operation may be
+ invalid even though the function returns MPI_SUCCESS. This
+ option enables an experimental feature that will return an
+ error if the result of the collective is invalid.
+
+ ##############################################################
# memory parameters
- category : memory
name : ABORT_ON_LEAKED_HANDLES
More information about the mpich2-commits
mailing list