[mpich2-commits] r6698 - in mpich2/trunk/src/mpi: errhan init
goodell at mcs.anl.gov
goodell at mcs.anl.gov
Thu May 20 22:34:43 CDT 2010
Author: goodell
Date: 2010-05-20 22:34:43 -0500 (Thu, 20 May 2010)
New Revision: 6698
Modified:
mpich2/trunk/src/mpi/errhan/errutil.c
mpich2/trunk/src/mpi/init/initthread.c
Log:
fix error handling in MPIR_Init_thread and errutil.c
In our attempt to cleanup, we can get an assertion failure if we
failed too early.
No reviewer.
Modified: mpich2/trunk/src/mpi/errhan/errutil.c
===================================================================
--- mpich2/trunk/src/mpi/errhan/errutil.c 2010-05-21 03:34:41 UTC (rev 6697)
+++ mpich2/trunk/src/mpi/errhan/errutil.c 2010-05-21 03:34:43 UTC (rev 6698)
@@ -78,6 +78,8 @@
* their sizes, and masks and shifts that may be used to extract them.
*/
+static int did_err_init = FALSE; /* helps us solve a bootstrapping problem */
+
/* A few prototypes. These routines are called from the MPIR_Err_return
routines. checkValidErrcode depends on the MPICH_ERROR_MSG_LEVEL */
@@ -244,10 +246,18 @@
int rc;
MPIU_THREADPRIV_DECL;
- MPIU_THREADPRIV_GET;
+ rc = checkValidErrcode( error_class, fcname, &errcode );
- rc = checkValidErrcode( error_class, fcname, &errcode );
-
+ if (MPIR_Process.initialized == MPICH_PRE_INIT ||
+ MPIR_Process.initialized == MPICH_POST_FINALIZED)
+ {
+ /* for whatever reason, we aren't initialized (perhaps error during MPI_Init) */
+ handleFatalError(MPIR_Process.comm_world, fcname, errcode);
+ return MPI_ERR_INTERN;
+ }
+
+ MPIU_THREADPRIV_GET; /* must come after sanity check */
+
/* First, check the nesting level */
if (MPIR_Nest_value()) return errcode;
@@ -407,6 +417,7 @@
# if MPICH_ERROR_MSG_LEVEL >= MPICH_ERROR_MSG_ALL
MPIR_Err_stack_init();
# endif
+ did_err_init = TRUE;
}
@@ -1308,17 +1319,21 @@
static MPID_Thread_mutex_t error_ring_mutex;
#define error_ring_mutex_create(_mpi_errno_p) MPID_Thread_mutex_create(&error_ring_mutex,_mpi_errno_p)
#define error_ring_mutex_destroy(_mpi_errno_p) MPID_Thread_mutex_destroy(&error_ring_mutex,_mpi_errno_p)
-#define error_ring_mutex_lock() \
- do { \
- MPIU_THREAD_CHECK_BEGIN \
- MPID_Thread_mutex_lock(&error_ring_mutex); \
- MPIU_THREAD_CHECK_END \
+#define error_ring_mutex_lock() \
+ do { \
+ if (did_err_init) { \
+ MPIU_THREAD_CHECK_BEGIN \
+ MPID_Thread_mutex_lock(&error_ring_mutex); \
+ MPIU_THREAD_CHECK_END \
+ } \
} while (0)
-#define error_ring_mutex_unlock() \
- do { \
- MPIU_THREAD_CHECK_BEGIN \
- MPID_Thread_mutex_unlock(&error_ring_mutex); \
- MPIU_THREAD_CHECK_END \
+#define error_ring_mutex_unlock() \
+ do { \
+ if (did_err_init) { \
+ MPIU_THREAD_CHECK_BEGIN \
+ MPID_Thread_mutex_unlock(&error_ring_mutex); \
+ MPIU_THREAD_CHECK_END \
+ } \
} while (0)
#else
#define error_ring_mutex_create(_a)
Modified: mpich2/trunk/src/mpi/init/initthread.c
===================================================================
--- mpich2/trunk/src/mpi/init/initthread.c 2010-05-21 03:34:41 UTC (rev 6697)
+++ mpich2/trunk/src/mpi/init/initthread.c 2010-05-21 03:34:43 UTC (rev 6698)
@@ -220,6 +220,7 @@
int has_args;
int has_env;
int thread_provided;
+ int exit_init_cs_on_failure = 0;
MPIU_THREADPRIV_DECL;
/* For any code in the device that wants to check for runtime
@@ -259,9 +260,6 @@
}
# endif
- mpi_errno = MPIR_Param_init_params();
- if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-
#if 0
/* This should never happen */
if (MPIR_Version_device == 0) {
@@ -374,14 +372,15 @@
MPIR_COMML_REMEMBER( MPIR_Process.comm_self );
/* Call any and all MPID_Init type functions */
- /* FIXME: The call to err init should be within an ifdef
- HAVE_ ERROR_CHECKING block (as must all uses of Err_create_code) */
MPIR_Err_init();
MPIR_Datatype_init();
MPIR_Nest_init();
/* MPIU_Timer_pre_init(); */
+ mpi_errno = MPIR_Param_init_params();
+ if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
/* define MPI as initialized so that we can use MPI functions within
MPID_Init if necessary */
MPIR_Process.initialized = MPICH_WITHIN_MPI;
@@ -389,6 +388,7 @@
/* We can't acquire any critical sections until this point. Any
* earlier the basic data structures haven't been initialized */
MPIU_THREAD_CS_ENTER(INIT,required);
+ exit_init_cs_on_failure = 1;
mpi_errno = MPID_Init(argc, argv, required, &thread_provided,
&has_args, &has_env);
@@ -432,10 +432,6 @@
mpirinitf_();
#endif
- /* --BEGIN ERROR HANDLING-- */
- if (mpi_errno != MPI_SUCCESS)
- MPIR_Process.initialized = MPICH_PRE_INIT;
- /* --END ERROR HANDLING-- */
/* FIXME: Does this need to come before the call to MPID_InitComplete?
For some debugger support, MPIR_WaitForDebugger may want to use
MPI communication routines to collect information for the debugger */
@@ -452,7 +448,12 @@
return mpi_errno;
fn_fail:
- MPIU_THREAD_CS_EXIT(INIT,required);
+ /* signal to error handling routines that core services are unavailable */
+ MPIR_Process.initialized = MPICH_PRE_INIT;
+
+ if (exit_init_cs_on_failure) {
+ MPIU_THREAD_CS_EXIT(INIT,required);
+ }
MPIU_THREAD_CS_FINALIZE;
return mpi_errno;
}
More information about the mpich2-commits
mailing list