[mpich2-commits] r7437 - in mpich2/trunk/src: mpid/ch3/channels/nemesis/nemesis/src mpid/ch3/channels/nemesis/src pm/hydra/ui/mpich util/param
buntinas at mcs.anl.gov
buntinas at mcs.anl.gov
Thu Nov 11 10:32:35 CST 2010
Author: buntinas
Date: 2010-11-11 10:32:35 -0600 (Thu, 11 Nov 2010)
New Revision: 7437
Modified:
mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/src/mpid_nem_ckpt.c
mpich2/trunk/src/mpid/ch3/channels/nemesis/src/ch3_progress.c
mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.c
mpich2/trunk/src/util/param/params.yml
Log:
adding feature that with checkpointing enabled in configure, the library will not initialize the checkpointing library unless the user specifies it in mpiexec (i.e., includes the -ckpoint-prefix= option). This allows an app to run on machines that don't have the checkpointing kernel module loaded.
Modified: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/src/mpid_nem_ckpt.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/src/mpid_nem_ckpt.c 2010-11-11 07:55:02 UTC (rev 7436)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/src/mpid_nem_ckpt.c 2010-11-11 16:32:35 UTC (rev 7437)
@@ -131,6 +131,9 @@
MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_NEM_CKPT_INIT);
+ if (!MPIR_PARAM_ENABLE_CKPOINT)
+ goto fn_exit;
+
client_id = cr_init();
MPIU_ERR_CHKANDJUMP(client_id < 0 && errno == ENOSYS, mpi_errno, MPI_ERR_OTHER, "**blcr_mod");
Modified: mpich2/trunk/src/mpid/ch3/channels/nemesis/src/ch3_progress.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/src/ch3_progress.c 2010-11-11 07:55:02 UTC (rev 7436)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/src/ch3_progress.c 2010-11-11 16:32:35 UTC (rev 7437)
@@ -242,16 +242,18 @@
}
#ifdef ENABLE_CHECKPOINTING
- if (MPIDI_nem_ckpt_start_checkpoint) {
- MPIDI_nem_ckpt_start_checkpoint = FALSE;
- mpi_errno = MPIDI_nem_ckpt_start();
- if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ if (MPIR_PARAM_ENABLE_CKPOINT) {
+ if (MPIDI_nem_ckpt_start_checkpoint) {
+ MPIDI_nem_ckpt_start_checkpoint = FALSE;
+ mpi_errno = MPIDI_nem_ckpt_start();
+ if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ }
+ if (MPIDI_nem_ckpt_finish_checkpoint) {
+ MPIDI_nem_ckpt_finish_checkpoint = FALSE;
+ mpi_errno = MPIDI_nem_ckpt_finish();
+ if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ }
}
- if (MPIDI_nem_ckpt_finish_checkpoint) {
- MPIDI_nem_ckpt_finish_checkpoint = FALSE;
- mpi_errno = MPIDI_nem_ckpt_finish();
- if (mpi_errno) MPIU_ERR_POP(mpi_errno);
- }
#endif
do
Modified: mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.c
===================================================================
--- mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.c 2010-11-11 07:55:02 UTC (rev 7436)
+++ mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.c 2010-11-11 16:32:35 UTC (rev 7437)
@@ -171,6 +171,10 @@
goto fn_fail;
}
+ /* if the user set the checkpoint prefix, set env var to enable checkpointing on the processes */
+ if (HYD_handle.user_global.ckpoint_prefix)
+ HYDU_append_env_to_list("MPICH_ENABLE_CKPOINT", "1", &HYD_handle.user_global.global_env.user);
+
status = HYDU_set_common_signals(signal_cb);
HYDU_ERR_POP(status, "unable to set signal\n");
Modified: mpich2/trunk/src/util/param/params.yml
===================================================================
--- mpich2/trunk/src/util/param/params.yml 2010-11-11 07:55:02 UTC (rev 7436)
+++ mpich2/trunk/src/util/param/params.yml 2010-11-11 16:32:35 UTC (rev 7437)
@@ -26,6 +26,8 @@
description : parameters that control error handling behavior (stack traces, aborts, etc)
- name : debugger
description : parameters relevant to the "MPIR" debugger interface
+ - name : checkpointing
+ description : parameters relevant to checkpointing
- name : threads
description : multi-threading parameters
- name : nemesis
@@ -244,10 +246,25 @@
type : boolean
default : false
description : >-
- If true causes processes to wait in MPI_Init and
+ If true, causes processes to wait in MPI_Init and
MPI_Initthread for a debugger to be attached. Once the
debugger has attached, the variable 'hold' should be set to 0
in order to allow the process to continue (e.g., in gdb, "set
hold=0").
+ ##############################################################
+ # checkpointing parameters
+ - category : checkpointing
+ name : ENABLE_CKPOINT
+ type : boolean
+ default : false
+ description : >-
+ If true, enables checkpointing support and returns an error if
+ checkpointing library cannot be initialized.
+
...
+
+# Local Variables:
+# mode: conf-colon
+# indent-tabs-mode: nil
+# End:
More information about the mpich2-commits
mailing list