[mpich2-commits] r6698 - in mpich2/trunk/src/mpi: errhan init

goodell at mcs.anl.gov goodell at mcs.anl.gov
Thu May 20 22:34:43 CDT 2010


Author: goodell
Date: 2010-05-20 22:34:43 -0500 (Thu, 20 May 2010)
New Revision: 6698

Modified:
   mpich2/trunk/src/mpi/errhan/errutil.c
   mpich2/trunk/src/mpi/init/initthread.c
Log:
fix error handling in MPIR_Init_thread and errutil.c

In our attempt to cleanup, we can get an assertion failure if we
failed too early.

No reviewer.

Modified: mpich2/trunk/src/mpi/errhan/errutil.c
===================================================================
--- mpich2/trunk/src/mpi/errhan/errutil.c	2010-05-21 03:34:41 UTC (rev 6697)
+++ mpich2/trunk/src/mpi/errhan/errutil.c	2010-05-21 03:34:43 UTC (rev 6698)
@@ -78,6 +78,8 @@
  * their sizes, and masks and shifts that may be used to extract them.
  */
 
+static int did_err_init = FALSE; /* helps us solve a bootstrapping problem */
+
 /* A few prototypes.  These routines are called from the MPIR_Err_return 
    routines.  checkValidErrcode depends on the MPICH_ERROR_MSG_LEVEL */
 
@@ -244,10 +246,18 @@
     int rc;
     MPIU_THREADPRIV_DECL;
 
-    MPIU_THREADPRIV_GET; 
+    rc = checkValidErrcode( error_class, fcname, &errcode );
 
-    rc = checkValidErrcode( error_class, fcname, &errcode );
-    
+    if (MPIR_Process.initialized == MPICH_PRE_INIT ||
+        MPIR_Process.initialized == MPICH_POST_FINALIZED)
+    {
+        /* for whatever reason, we aren't initialized (perhaps error during MPI_Init) */
+        handleFatalError(MPIR_Process.comm_world, fcname, errcode);
+        return MPI_ERR_INTERN;
+    }
+
+    MPIU_THREADPRIV_GET; /* must come after sanity check */
+
     /* First, check the nesting level */
     if (MPIR_Nest_value()) return errcode;
 
@@ -407,6 +417,7 @@
 #   if MPICH_ERROR_MSG_LEVEL >= MPICH_ERROR_MSG_ALL
     MPIR_Err_stack_init();
 #   endif
+    did_err_init = TRUE;
 }
 
 
@@ -1308,17 +1319,21 @@
 static MPID_Thread_mutex_t error_ring_mutex;
 #define error_ring_mutex_create(_mpi_errno_p) MPID_Thread_mutex_create(&error_ring_mutex,_mpi_errno_p)
 #define error_ring_mutex_destroy(_mpi_errno_p) MPID_Thread_mutex_destroy(&error_ring_mutex,_mpi_errno_p)
-#define error_ring_mutex_lock()                    \
-    do {                                           \
-        MPIU_THREAD_CHECK_BEGIN                    \
-        MPID_Thread_mutex_lock(&error_ring_mutex); \
-        MPIU_THREAD_CHECK_END                      \
+#define error_ring_mutex_lock()                          \
+    do {                                                 \
+        if (did_err_init) {                              \
+            MPIU_THREAD_CHECK_BEGIN                      \
+            MPID_Thread_mutex_lock(&error_ring_mutex);   \
+            MPIU_THREAD_CHECK_END                        \
+        }                                                \
     } while (0)
-#define error_ring_mutex_unlock()                    \
-    do {                                             \
-        MPIU_THREAD_CHECK_BEGIN                      \
-        MPID_Thread_mutex_unlock(&error_ring_mutex); \
-        MPIU_THREAD_CHECK_END                        \
+#define error_ring_mutex_unlock()                        \
+    do {                                                 \
+        if (did_err_init) {                              \
+            MPIU_THREAD_CHECK_BEGIN                      \
+            MPID_Thread_mutex_unlock(&error_ring_mutex); \
+            MPIU_THREAD_CHECK_END                        \
+        }                                                \
     } while (0)
 #else
 #define error_ring_mutex_create(_a)

Modified: mpich2/trunk/src/mpi/init/initthread.c
===================================================================
--- mpich2/trunk/src/mpi/init/initthread.c	2010-05-21 03:34:41 UTC (rev 6697)
+++ mpich2/trunk/src/mpi/init/initthread.c	2010-05-21 03:34:43 UTC (rev 6698)
@@ -220,6 +220,7 @@
     int has_args;
     int has_env;
     int thread_provided;
+    int exit_init_cs_on_failure = 0;
     MPIU_THREADPRIV_DECL;
 
     /* For any code in the device that wants to check for runtime 
@@ -259,9 +260,6 @@
     }
 #   endif
 
-    mpi_errno = MPIR_Param_init_params();
-    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-
 #if 0
     /* This should never happen */
     if (MPIR_Version_device == 0) {
@@ -374,14 +372,15 @@
     MPIR_COMML_REMEMBER( MPIR_Process.comm_self );
 
     /* Call any and all MPID_Init type functions */
-    /* FIXME: The call to err init should be within an ifdef
-       HAVE_ ERROR_CHECKING block (as must all uses of Err_create_code) */
     MPIR_Err_init();
     MPIR_Datatype_init();
 
     MPIR_Nest_init();
     /* MPIU_Timer_pre_init(); */
 
+    mpi_errno = MPIR_Param_init_params();
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
     /* define MPI as initialized so that we can use MPI functions within 
        MPID_Init if necessary */
     MPIR_Process.initialized = MPICH_WITHIN_MPI;
@@ -389,6 +388,7 @@
     /* We can't acquire any critical sections until this point.  Any
      * earlier the basic data structures haven't been initialized */
     MPIU_THREAD_CS_ENTER(INIT,required);
+    exit_init_cs_on_failure = 1;
 
     mpi_errno = MPID_Init(argc, argv, required, &thread_provided, 
 			  &has_args, &has_env);
@@ -432,10 +432,6 @@
     mpirinitf_();
 #endif
 
-    /* --BEGIN ERROR HANDLING-- */
-    if (mpi_errno != MPI_SUCCESS)
-        MPIR_Process.initialized = MPICH_PRE_INIT;
-    /* --END ERROR HANDLING-- */
     /* FIXME: Does this need to come before the call to MPID_InitComplete?
        For some debugger support, MPIR_WaitForDebugger may want to use
        MPI communication routines to collect information for the debugger */
@@ -452,7 +448,12 @@
     return mpi_errno;
 
 fn_fail:
-    MPIU_THREAD_CS_EXIT(INIT,required);
+    /* signal to error handling routines that core services are unavailable */
+    MPIR_Process.initialized = MPICH_PRE_INIT;
+
+    if (exit_init_cs_on_failure) {
+        MPIU_THREAD_CS_EXIT(INIT,required);
+    }
     MPIU_THREAD_CS_FINALIZE;
     return mpi_errno;
 }



More information about the mpich2-commits mailing list