[mpich2-commits] r7887 - in mpich2/trunk/src/pm/hydra: . tools/bootstrap tools/bootstrap/include

balaji at mcs.anl.gov balaji at mcs.anl.gov
Tue Feb 1 18:38:01 CST 2011


Author: balaji
Date: 2011-02-01 18:38:01 -0600 (Tue, 01 Feb 2011)
New Revision: 7887

Added:
   mpich2/trunk/src/pm/hydra/tools/bootstrap/include/bsci.h
Removed:
   mpich2/trunk/src/pm/hydra/tools/bootstrap/include/bsci.h.in
Modified:
   mpich2/trunk/src/pm/hydra/configure.in
   mpich2/trunk/src/pm/hydra/tools/bootstrap/Makefile.mk
Log:
Fixes for the case where only a subset of bootstrap servers are
picked. Fixes ticket #1427.

Modified: mpich2/trunk/src/pm/hydra/configure.in
===================================================================
--- mpich2/trunk/src/pm/hydra/configure.in	2011-02-02 00:37:57 UTC (rev 7886)
+++ mpich2/trunk/src/pm/hydra/configure.in	2011-02-02 00:38:01 UTC (rev 7887)
@@ -145,9 +145,9 @@
 #########################################################################
 AC_ARG_WITH(hydra-bss,
 	[AC_HELP_STRING([--with-hydra-bss=name],
-		[Bootstrap Server (ssh,none,rsh,fork,slurm,ll,lsf,sge,persist,pbs)])],
+		[Bootstrap Server (ssh,rsh,fork,slurm,ll,lsf,sge,pbs,persist)])],
 	[ hydra_bss=$withval ],
-	[ hydra_bss="ssh,none,rsh,fork,slurm,ll,lsf,sge,persist,pbs" ])
+	[ hydra_bss="ssh,rsh,fork,slurm,ll,lsf,sge,pbs,persist" ])
 AC_MSG_CHECKING(bootstrap server)
 AC_MSG_RESULT($hydra_bss)
 hydra_bss_names="`echo $hydra_bss | sed -e 's/:/ /g' -e 's/,/ /g'`"
@@ -156,73 +156,74 @@
 hydra_bss_persist=false
 hydra_bss_pbs=false
 hydra_bss_none=false
+
+## The "none" bootstrap server is always enabled
+hydra_bss_none=true
+
+## Enable the "none" RMK at the start, so it is the default RMK if
+## nothing else is found
+available_rmks=`echo $available_rmks none`
+
 for hydra_bss_name in ${hydra_bss_names}; do
     case "$hydra_bss_name" in
     	ssh)
 		hydra_bss_external=true
-		available_bss=`echo $available_bss ssh`
 		available_launchers=`echo $available_launchers ssh`
 		;;
     	rsh)
 		hydra_bss_external=true
-		available_bss=`echo $available_bss rsh`
 		available_launchers=`echo $available_launchers rsh`
 		;;
     	fork)
 		hydra_bss_external=true
-		available_bss=`echo $available_bss fork`
 		available_launchers=`echo $available_launchers fork`
 		;;
 	slurm)
 		hydra_bss_external=true
-		available_bss=`echo $available_bss slurm`
 		available_launchers=`echo $available_launchers slurm`
 		available_rmks=`echo $available_rmks slurm`
 		;;
 	ll)
 		hydra_bss_external=true
-		available_bss=`echo $available_bss ll`
 		available_launchers=`echo $available_launchers ll`
 		available_rmks=`echo $available_rmks ll`
 		;;
 	lsf)
 		hydra_bss_external=true
-		available_bss=`echo $available_bss lsf`
 		available_launchers=`echo $available_launchers lsf`
 		available_rmks=`echo $available_rmks lsf`
 		;;
 	sge)
 		hydra_bss_external=true
-		available_bss=`echo $available_bss sge`
 		available_launchers=`echo $available_launchers sge`
 		available_rmks=`echo $available_rmks sge`
 		;;
 	persist)
 		hydra_bss_persist=true
-		available_bss=`echo $available_bss persist`
 		available_launchers=`echo $available_launchers persist`
 		;;
 	pbs)
 		hydra_bss_pbs=true
-		available_bss=`echo $available_bss pbs`
 		available_rmks=`echo $available_rmks pbs`
 		;;
-	none|no)
-		hydra_bss_none=true
-		available_bss=`echo $available_bss none`
-		available_launchers=`echo $available_launchers none`
-		available_rmks=`echo $available_rmks none`
-		;;
 	*)
 		;;
     esac
 done
 
+## Enable the "none" launcher at the start, so it is not picked as the
+## launcher, unless nothing else is found
+available_launchers=`echo $available_launchers none`
+
 ## Bootstrap servers
-AM_CONDITIONAL([hydra_bss_external], [$hydra_bss_external])
-AM_CONDITIONAL([hydra_bss_persist], [$hydra_bss_persist])
-AM_CONDITIONAL([hydra_bss_pbs], [$hydra_bss_pbs])
-AM_CONDITIONAL([hydra_bss_none], [$hydra_bss_none])
+if test "$hydra_bss_external" ; then
+   AM_CONDITIONAL([hydra_bss_external], [true])
+   AC_DEFINE(HAVE_BSS_EXTERNAL,1,[Define if external bss is enabled])
+fi
+if test "$hydra_bss_persist" ; then
+   AM_CONDITIONAL([hydra_bss_persist], [true])
+   AC_DEFINE(HAVE_BSS_PERSIST,1,[Define if persist bss is enabled])
+fi
 
 ## Launchers
 for hydra_launcher in ${available_launchers} ; do
@@ -651,6 +652,5 @@
 # Final output
 AC_OUTPUT(Makefile
 	tools/bootstrap/src/bsci_init.c
-	tools/bootstrap/include/bsci.h
 	hydra-doxygen.cfg
 )

Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/Makefile.mk
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/Makefile.mk	2011-02-02 00:37:57 UTC (rev 7886)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/Makefile.mk	2011-02-02 00:38:01 UTC (rev 7887)
@@ -4,7 +4,7 @@
 #     See COPYRIGHT in top-level directory.
 #
 
-AM_CPPFLAGS += -I$(top_srcdir)/tools/bootstrap/include -I$(top_builddir)/tools/bootstrap/include
+AM_CPPFLAGS += -I$(top_srcdir)/tools/bootstrap/include
 
 include tools/bootstrap/src/Makefile.mk
 include tools/bootstrap/utils/Makefile.mk

Copied: mpich2/trunk/src/pm/hydra/tools/bootstrap/include/bsci.h (from rev 7886, mpich2/trunk/src/pm/hydra/tools/bootstrap/include/bsci.h.in)
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/include/bsci.h	                        (rev 0)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/include/bsci.h	2011-02-02 00:38:01 UTC (rev 7887)
@@ -0,0 +1,239 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *  (C) 2008 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#ifndef BSCI_H_INCLUDED
+#define BSCI_H_INCLUDED
+
+/** @file bsci.h.in */
+
+/*! \addtogroup bootstrap Bootstrap Control Interface
+ * @{
+ */
+
+/**
+ * \brief BSCI internal structure to maintain persistent information.
+ */
+struct HYDT_bsci_info {
+    /** \brief RMK to use */
+    const char *rmk;
+
+    /** \brief Launcher to use */
+    const char *launcher;
+
+    /** \brief Launcher executable to use */
+    const char *launcher_exec;
+
+    /** \brief Enable/disable X-forwarding */
+    int  enablex;
+
+    /** \brief Enable/disable debugging */
+    int  debug;
+};
+
+/**
+ * \brief Function pointers for device specific implementations of
+ * different BSCI functions.
+ */
+struct HYDT_bsci_fns {
+    /* RMK functions */
+    /** \brief Query if the RMK integrates natively with the RM */
+    HYD_status(*query_native_int) (int *ret);
+
+    /** \brief Query for node list information */
+    HYD_status(*query_node_list) (struct HYD_node **node_list);
+
+    /** \brief Query for job ID information */
+    HYD_status(*query_jobid) (char **jobid);
+
+    /** \brief Finalize the RMK */
+    HYD_status(*rmk_finalize) (void);
+
+
+    /* Launcher functions */
+    /** \brief Launch processes */
+    HYD_status(*launch_procs) (char **args, struct HYD_node *node_list, int *control_fd);
+
+    /** \brief Finalize the bootstrap control device */
+    HYD_status(*launcher_finalize) (void);
+
+    /** \brief Wait for launched processes to complete */
+    HYD_status(*wait_for_completion) (int timeout);
+
+    /** \brief Query for the universe size */
+    HYD_status(*query_usize) (int *size);
+
+    /** \brief Query the ID of a proxy */
+    HYD_status(*query_proxy_id) (int *proxy_id);
+
+    /** \brief Query if an environment variable should be inherited */
+    HYD_status(*query_env_inherit) (const char *env_name, int *ret);
+};
+
+/** \cond */
+extern struct HYDT_bsci_fns HYDT_bsci_fns;
+extern struct HYDT_bsci_info HYDT_bsci_info;
+/** \endcond */
+
+/**
+ * \brief HYDT_bsci_init - Initialize the bootstrap control device
+ *
+ * \param[in]   rmk             Resource management kernel to use
+ * \param[in]   launcher        Launcher to use
+ * \param[in]   launcher_exec   Launcher executable to use (optional)
+ * \param[in]   enablex         Enable/disable X-forwarding (hint only)
+ * \param[in]   debug           Enable/disable debugging
+ *
+ * This function initializes the bootstrap control device. This needs
+ * to be called before any other BSCI function. Implementors are
+ * expected to set any bootstrap implementation specific function
+ * pointers in this function to be used by later BSCI calls.
+ */
+HYD_status HYDT_bsci_init(const char *rmk, const char *launcher,
+                          const char *launcher_exec, int enablex,
+                          int debug);
+
+
+/**
+ * \brief HYDT_bsci_launch_procs - Launch processes
+ *
+ * \param[in]   args            Arguments to be used for the launched processes
+ * \param[in]   node_list       List of nodes to launch processes on
+ * \param[out]  control_fd      Control socket to communicate with the launched process
+ * \param[in]   stdout_cb       Stdout callback function
+ * \param[in]   stderr_cb       Stderr callback function
+ *
+ * This function appends a proxy ID to the end of the args list and
+ * uses this combined list as the executable and its arguments to
+ * launch. Upper layers will need to account for this automatic
+ * addition of the proxy ID.
+ *
+ * Launchers that perform sequential launches (one process at a time),
+ * should set the proxy ID string in sequential order. Launchers that
+ * perform parallel launches should set the proxy ID string to "-1",
+ * but allow proxies to query their ID information on each node using
+ * the HYDT_bsci_query_proxy_id function.
+ */
+HYD_status HYDT_bsci_launch_procs(char **args, struct HYD_node *node_list, int *control_fd);
+
+
+/**
+ * \brief HYDT_bsci_finalize - Finalize the bootstrap control device
+ *
+ * This function cleans up any relevant state that the bootstrap
+ * device maintained.
+ */
+HYD_status HYDT_bsci_finalize(void);
+
+
+/**
+ * \brief HYDT_bsci_wait_for_completion - Wait for launched processes to complete
+ *
+ * \param[in]  timeout        Time to wait for
+ *
+ * \param[ret] status         HYD_TIMED_OUT if the timer expired
+ *
+ * This function waits for all processes it launched to finish. The
+ * launcher should keep track of the processes it is launching and
+ * wait for their completion.
+ */
+HYD_status HYDT_bsci_wait_for_completion(int timeout);
+
+
+/**
+ * \brief HYDT_bsci_query_node_list - Query for node list information
+ *
+ * \param[out] node_list       Lists of nodes available
+ *
+ * This function allows the upper layers to query the available
+ * nodes.
+ */
+HYD_status HYDT_bsci_query_node_list(struct HYD_node **node_list);
+
+
+/**
+ * \brief HYDT_bsci_query_jobid - Query for Job ID information
+ *
+ * \param[out] jobid       Job ID
+ *
+ * This function allows the upper layers to query the job ID.
+ */
+HYD_status HYDT_bsci_query_jobid(char **jobid);
+
+
+/**
+ * \brief HYDT_bsci_query_usize - Query for the universe size
+ *
+ * \param[out]  size       Maximum number of processes that can be launched
+ *
+ * If the underlying system allows for multitasking many processes on
+ * a single processing element, the launcher should return "-1"
+ * (representing infinite). If not, it should specify the number of
+ * processes that can be spawned.
+ */
+HYD_status HYDT_bsci_query_usize(int *size);
+
+
+/**
+ * \brief HYDT_bsci_query_proxy_id - Query the ID of a proxy
+ *
+ * \param[out]  proxy_id    My proxy ID
+ *
+ * This function is called by each proxy if the proxy_str_id is
+ * specified as "-1" during launch.
+ */
+HYD_status HYDT_bsci_query_proxy_id(int *proxy_id);
+
+/**
+ * \brief HYDT_bsci_query_env_inherit - Query if an environment
+ * variable is safe to be inherited
+ *
+ * \param[in]  env_name    Name of the environment variable
+ * \param[out] ret         Boolean for true (1) or false (0)
+ *
+ * This function is used to check if an environment variable inherited
+ * from the user's environment is safe to be propagated to the remote
+ * processes.
+ */
+HYD_status HYDT_bsci_query_env_inherit(const char *env_name, int *ret);
+
+/**
+ * \brief HYDT_bsci_query_native_int - Query if the RMK integrates
+ * natively with the RM
+ *
+ * \param[out] ret                    Boolean for true (1) or false (0)
+ *
+ * This function is used to check if an environment variable inherited
+ * from the user's environment is safe to be propagated to the remote
+ * processes.
+ */
+HYD_status HYDT_bsci_query_native_int(int *ret);
+
+/*! @} */
+
+/* Each launcher has to expose an initialization function */
+#if defined HAVE_BSS_EXTERNAL
+HYD_status HYDT_bsci_launcher_ssh_init(void);
+HYD_status HYDT_bsci_launcher_rsh_init(void);
+HYD_status HYDT_bsci_launcher_fork_init(void);
+HYD_status HYDT_bsci_launcher_slurm_init(void);
+HYD_status HYDT_bsci_launcher_ll_init(void);
+HYD_status HYDT_bsci_launcher_lsf_init(void);
+HYD_status HYDT_bsci_launcher_sge_init(void);
+HYD_status HYDT_bsci_launcher_none_init(void);
+
+HYD_status HYDT_bsci_rmk_slurm_init(void);
+HYD_status HYDT_bsci_rmk_ll_init(void);
+HYD_status HYDT_bsci_rmk_lsf_init(void);
+HYD_status HYDT_bsci_rmk_sge_init(void);
+HYD_status HYDT_bsci_rmk_pbs_init(void);
+HYD_status HYDT_bsci_rmk_none_init(void);
+#endif /* HAVE_BSS_EXTERNAL */
+
+#if defined HAVE_BSS_PERSIST
+HYD_status HYDT_bsci_launcher_persist_init(void);
+#endif /* HAVE_BSS_PERSIST */
+
+#endif /* BSCI_H_INCLUDED */

Deleted: mpich2/trunk/src/pm/hydra/tools/bootstrap/include/bsci.h.in
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/include/bsci.h.in	2011-02-02 00:37:57 UTC (rev 7886)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/include/bsci.h.in	2011-02-02 00:38:01 UTC (rev 7887)
@@ -1,220 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- *  (C) 2008 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#ifndef BSCI_H_INCLUDED
-#define BSCI_H_INCLUDED
-
-/** @file bsci.h.in */
-
-/*! \addtogroup bootstrap Bootstrap Control Interface
- * @{
- */
-
-/**
- * \brief BSCI internal structure to maintain persistent information.
- */
-struct HYDT_bsci_info {
-    /** \brief RMK to use */
-    const char *rmk;
-
-    /** \brief Launcher to use */
-    const char *launcher;
-
-    /** \brief Launcher executable to use */
-    const char *launcher_exec;
-
-    /** \brief Enable/disable X-forwarding */
-    int  enablex;
-
-    /** \brief Enable/disable debugging */
-    int  debug;
-};
-
-/**
- * \brief Function pointers for device specific implementations of
- * different BSCI functions.
- */
-struct HYDT_bsci_fns {
-    /* RMK functions */
-    /** \brief Query if the RMK integrates natively with the RM */
-    HYD_status(*query_native_int) (int *ret);
-
-    /** \brief Query for node list information */
-    HYD_status(*query_node_list) (struct HYD_node **node_list);
-
-    /** \brief Query for job ID information */
-    HYD_status(*query_jobid) (char **jobid);
-
-    /** \brief Finalize the RMK */
-    HYD_status(*rmk_finalize) (void);
-
-
-    /* Launcher functions */
-    /** \brief Launch processes */
-    HYD_status(*launch_procs) (char **args, struct HYD_node *node_list, int *control_fd);
-
-    /** \brief Finalize the bootstrap control device */
-    HYD_status(*launcher_finalize) (void);
-
-    /** \brief Wait for launched processes to complete */
-    HYD_status(*wait_for_completion) (int timeout);
-
-    /** \brief Query for the universe size */
-    HYD_status(*query_usize) (int *size);
-
-    /** \brief Query the ID of a proxy */
-    HYD_status(*query_proxy_id) (int *proxy_id);
-
-    /** \brief Query if an environment variable should be inherited */
-    HYD_status(*query_env_inherit) (const char *env_name, int *ret);
-};
-
-/** \cond */
-extern struct HYDT_bsci_fns HYDT_bsci_fns;
-extern struct HYDT_bsci_info HYDT_bsci_info;
-/** \endcond */
-
-/**
- * \brief HYDT_bsci_init - Initialize the bootstrap control device
- *
- * \param[in]   rmk             Resource management kernel to use
- * \param[in]   launcher        Launcher to use
- * \param[in]   launcher_exec   Launcher executable to use (optional)
- * \param[in]   enablex         Enable/disable X-forwarding (hint only)
- * \param[in]   debug           Enable/disable debugging
- *
- * This function initializes the bootstrap control device. This needs
- * to be called before any other BSCI function. Implementors are
- * expected to set any bootstrap implementation specific function
- * pointers in this function to be used by later BSCI calls.
- */
-HYD_status HYDT_bsci_init(const char *rmk, const char *launcher,
-                          const char *launcher_exec, int enablex,
-                          int debug);
-
-
-/**
- * \brief HYDT_bsci_launch_procs - Launch processes
- *
- * \param[in]   args            Arguments to be used for the launched processes
- * \param[in]   node_list       List of nodes to launch processes on
- * \param[out]  control_fd      Control socket to communicate with the launched process
- * \param[in]   stdout_cb       Stdout callback function
- * \param[in]   stderr_cb       Stderr callback function
- *
- * This function appends a proxy ID to the end of the args list and
- * uses this combined list as the executable and its arguments to
- * launch. Upper layers will need to account for this automatic
- * addition of the proxy ID.
- *
- * Launchers that perform sequential launches (one process at a time),
- * should set the proxy ID string in sequential order. Launchers that
- * perform parallel launches should set the proxy ID string to "-1",
- * but allow proxies to query their ID information on each node using
- * the HYDT_bsci_query_proxy_id function.
- */
-HYD_status HYDT_bsci_launch_procs(char **args, struct HYD_node *node_list, int *control_fd);
-
-
-/**
- * \brief HYDT_bsci_finalize - Finalize the bootstrap control device
- *
- * This function cleans up any relevant state that the bootstrap
- * device maintained.
- */
-HYD_status HYDT_bsci_finalize(void);
-
-
-/**
- * \brief HYDT_bsci_wait_for_completion - Wait for launched processes to complete
- *
- * \param[in]  timeout        Time to wait for
- *
- * \param[ret] status         HYD_TIMED_OUT if the timer expired
- *
- * This function waits for all processes it launched to finish. The
- * launcher should keep track of the processes it is launching and
- * wait for their completion.
- */
-HYD_status HYDT_bsci_wait_for_completion(int timeout);
-
-
-/**
- * \brief HYDT_bsci_query_node_list - Query for node list information
- *
- * \param[out] node_list       Lists of nodes available
- *
- * This function allows the upper layers to query the available
- * nodes.
- */
-HYD_status HYDT_bsci_query_node_list(struct HYD_node **node_list);
-
-
-/**
- * \brief HYDT_bsci_query_jobid - Query for Job ID information
- *
- * \param[out] jobid       Job ID
- *
- * This function allows the upper layers to query the job ID.
- */
-HYD_status HYDT_bsci_query_jobid(char **jobid);
-
-
-/**
- * \brief HYDT_bsci_query_usize - Query for the universe size
- *
- * \param[out]  size       Maximum number of processes that can be launched
- *
- * If the underlying system allows for multitasking many processes on
- * a single processing element, the launcher should return "-1"
- * (representing infinite). If not, it should specify the number of
- * processes that can be spawned.
- */
-HYD_status HYDT_bsci_query_usize(int *size);
-
-
-/**
- * \brief HYDT_bsci_query_proxy_id - Query the ID of a proxy
- *
- * \param[out]  proxy_id    My proxy ID
- *
- * This function is called by each proxy if the proxy_str_id is
- * specified as "-1" during launch.
- */
-HYD_status HYDT_bsci_query_proxy_id(int *proxy_id);
-
-/**
- * \brief HYDT_bsci_query_env_inherit - Query if an environment
- * variable is safe to be inherited
- *
- * \param[in]  env_name    Name of the environment variable
- * \param[out] ret         Boolean for true (1) or false (0)
- *
- * This function is used to check if an environment variable inherited
- * from the user's environment is safe to be propagated to the remote
- * processes.
- */
-HYD_status HYDT_bsci_query_env_inherit(const char *env_name, int *ret);
-
-/**
- * \brief HYDT_bsci_query_native_int - Query if the RMK integrates
- * natively with the RM
- *
- * \param[out] ret                    Boolean for true (1) or false (0)
- *
- * This function is used to check if an environment variable inherited
- * from the user's environment is safe to be propagated to the remote
- * processes.
- */
-HYD_status HYDT_bsci_query_native_int(int *ret);
-
-/*! @} */
-
-/* Each launcher has to expose an initialization function */
- at hydra_launcher_init_decl@
- at hydra_rmk_init_decl@
-
-#endif /* BSCI_H_INCLUDED */



More information about the mpich2-commits mailing list