[mpich2-commits] r7437 - in mpich2/trunk/src: mpid/ch3/channels/nemesis/nemesis/src mpid/ch3/channels/nemesis/src pm/hydra/ui/mpich util/param

buntinas at mcs.anl.gov buntinas at mcs.anl.gov
Thu Nov 11 10:32:35 CST 2010


Author: buntinas
Date: 2010-11-11 10:32:35 -0600 (Thu, 11 Nov 2010)
New Revision: 7437

Modified:
   mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/src/mpid_nem_ckpt.c
   mpich2/trunk/src/mpid/ch3/channels/nemesis/src/ch3_progress.c
   mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.c
   mpich2/trunk/src/util/param/params.yml
Log:
adding feature that with checkpointing enabled in configure, the library will not initialize the checkpointing library unless the user specifies it in mpiexec (i.e., includes the -ckpoint-prefix= option).  This allows an app to run on machines that don't have the checkpointing kernel module loaded.

Modified: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/src/mpid_nem_ckpt.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/src/mpid_nem_ckpt.c	2010-11-11 07:55:02 UTC (rev 7436)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/src/mpid_nem_ckpt.c	2010-11-11 16:32:35 UTC (rev 7437)
@@ -131,6 +131,9 @@
 
     MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_NEM_CKPT_INIT);
 
+    if (!MPIR_PARAM_ENABLE_CKPOINT)
+        goto fn_exit;
+    
     client_id = cr_init();
     MPIU_ERR_CHKANDJUMP(client_id < 0 && errno == ENOSYS, mpi_errno, MPI_ERR_OTHER, "**blcr_mod");
 

Modified: mpich2/trunk/src/mpid/ch3/channels/nemesis/src/ch3_progress.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/src/ch3_progress.c	2010-11-11 07:55:02 UTC (rev 7436)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/src/ch3_progress.c	2010-11-11 16:32:35 UTC (rev 7437)
@@ -242,16 +242,18 @@
     }
 
 #ifdef ENABLE_CHECKPOINTING
-    if (MPIDI_nem_ckpt_start_checkpoint) {
-        MPIDI_nem_ckpt_start_checkpoint = FALSE;
-        mpi_errno = MPIDI_nem_ckpt_start();
-        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+    if (MPIR_PARAM_ENABLE_CKPOINT) {
+        if (MPIDI_nem_ckpt_start_checkpoint) {
+            MPIDI_nem_ckpt_start_checkpoint = FALSE;
+            mpi_errno = MPIDI_nem_ckpt_start();
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+        }
+        if (MPIDI_nem_ckpt_finish_checkpoint) {
+            MPIDI_nem_ckpt_finish_checkpoint = FALSE;
+            mpi_errno = MPIDI_nem_ckpt_finish();
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+        }
     }
-    if (MPIDI_nem_ckpt_finish_checkpoint) {
-        MPIDI_nem_ckpt_finish_checkpoint = FALSE;
-        mpi_errno = MPIDI_nem_ckpt_finish();
-        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-    }
 #endif
 
     do

Modified: mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.c
===================================================================
--- mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.c	2010-11-11 07:55:02 UTC (rev 7436)
+++ mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.c	2010-11-11 16:32:35 UTC (rev 7437)
@@ -171,6 +171,10 @@
         goto fn_fail;
     }
 
+    /* if the user set the checkpoint prefix, set env var to enable checkpointing on the processes  */
+    if (HYD_handle.user_global.ckpoint_prefix)
+        HYDU_append_env_to_list("MPICH_ENABLE_CKPOINT", "1", &HYD_handle.user_global.global_env.user);
+
     status = HYDU_set_common_signals(signal_cb);
     HYDU_ERR_POP(status, "unable to set signal\n");
 

Modified: mpich2/trunk/src/util/param/params.yml
===================================================================
--- mpich2/trunk/src/util/param/params.yml	2010-11-11 07:55:02 UTC (rev 7436)
+++ mpich2/trunk/src/util/param/params.yml	2010-11-11 16:32:35 UTC (rev 7437)
@@ -26,6 +26,8 @@
       description : parameters that control error handling behavior (stack traces, aborts, etc)
     - name        : debugger
       description : parameters relevant to the "MPIR" debugger interface
+    - name        : checkpointing
+      description : parameters relevant to checkpointing
     - name        : threads
       description : multi-threading parameters
     - name        : nemesis
@@ -244,10 +246,25 @@
       type        : boolean
       default     : false
       description : >-
-        If true causes processes to wait in MPI_Init and
+        If true, causes processes to wait in MPI_Init and
         MPI_Initthread for a debugger to be attached.  Once the
         debugger has attached, the variable 'hold' should be set to 0
         in order to allow the process to continue (e.g., in gdb, "set
         hold=0").
 
+  ##############################################################
+    # checkpointing parameters
+    - category    : checkpointing
+      name        : ENABLE_CKPOINT
+      type        : boolean
+      default     : false
+      description : >-
+        If true, enables checkpointing support and returns an error if
+        checkpointing library cannot be initialized.
+
 ...
+
+# Local Variables:
+# mode: conf-colon
+# indent-tabs-mode: nil
+# End:



More information about the mpich2-commits mailing list