[mpich2-commits] r4156 - in mpich2/trunk/src/pm/hydra: bootstrap/fork bootstrap/slurm bootstrap/ssh bootstrap/utils control/consys demux include launcher/mpiexec launcher/utils pm/pmiserv pm/utils utils/args utils/env utils/launch utils/sock utils/string
balaji at mcs.anl.gov
balaji at mcs.anl.gov
Sun Mar 22 00:18:12 CDT 2009
Author: balaji
Date: 2009-03-22 00:18:11 -0500 (Sun, 22 Mar 2009)
New Revision: 4156
Modified:
mpich2/trunk/src/pm/hydra/bootstrap/fork/fork_launch.c
mpich2/trunk/src/pm/hydra/bootstrap/slurm/slurm_launch.c
mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_launch.c
mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_wait.c
mpich2/trunk/src/pm/hydra/control/consys/consys_close.c
mpich2/trunk/src/pm/hydra/control/consys/consys_launch.c
mpich2/trunk/src/pm/hydra/control/consys/consys_wait.c
mpich2/trunk/src/pm/hydra/demux/demux.c
mpich2/trunk/src/pm/hydra/include/hydra.h
mpich2/trunk/src/pm/hydra/include/hydra_base.h
mpich2/trunk/src/pm/hydra/include/hydra_utils.h
mpich2/trunk/src/pm/hydra/launcher/mpiexec/callback.c
mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c
mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c
mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c
mpich2/trunk/src/pm/hydra/launcher/utils/lchu.h
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle.c
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle.h
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle_v1.c
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.c
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.h
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy_cb.c
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy_utils.c
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_cb.c
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c
mpich2/trunk/src/pm/hydra/pm/utils/pmi.c
mpich2/trunk/src/pm/hydra/utils/args/args.c
mpich2/trunk/src/pm/hydra/utils/env/env.c
mpich2/trunk/src/pm/hydra/utils/launch/allocate.c
mpich2/trunk/src/pm/hydra/utils/launch/launch.c
mpich2/trunk/src/pm/hydra/utils/sock/sock.c
mpich2/trunk/src/pm/hydra/utils/string/string.c
Log:
Added support for allowing each proxy to handle non-contiguous
PMI_IDs. This lets us use a single proxy for multiple executables when
they are launched on the same node. This should fix ticket #464 and is
a big step towards tickets #445 (hierarchical proxies part) and #457
(process-core mapping).
Modified: mpich2/trunk/src/pm/hydra/bootstrap/fork/fork_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/fork/fork_launch.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/bootstrap/fork/fork_launch.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -13,8 +13,7 @@
HYD_Status HYD_BSCD_fork_launch_procs(void)
{
- struct HYD_Proc_params *proc_params;
- struct HYD_Partition_list *partition;
+ struct HYD_Partition *partition;
char *client_arg[HYD_EXEC_ARGS];
int i, arg, process_id;
HYD_Status status = HYD_SUCCESS;
@@ -26,33 +25,28 @@
* they want launched. Without this functionality, the proxy
* cannot use this and will have to perfom its own launch. */
process_id = 0;
- for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
- for (partition = proc_params->partition; partition; partition = partition->next) {
- if (partition->group_rank) /* Only rank 0 is spawned */
- continue;
+ for (partition = handle.partition_list; partition; partition = partition->next) {
- /* Setup the executable arguments */
- arg = 0;
- for (i = 0; partition->args[i]; i++)
- client_arg[arg++] = MPIU_Strdup(partition->args[i]);
- client_arg[arg++] = NULL;
+ /* Setup the executable arguments */
+ arg = 0;
+ for (i = 0; partition->proxy_args[i]; i++)
+ client_arg[arg++] = MPIU_Strdup(partition->proxy_args[i]);
+ client_arg[arg++] = NULL;
- /* The stdin pointer will be some value for process_id 0;
- * for everyone else, it's NULL. */
- status = HYDU_create_process(client_arg, (process_id == 0 ? &handle.in : NULL),
- &partition->out, &partition->err, &partition->pid,
- -1);
- HYDU_ERR_POP(status, "create process returned error\n");
+ /* The stdin pointer will be some value for process_id 0; for
+ * everyone else, it's NULL. */
+ status = HYDU_create_process(client_arg, (process_id == 0 ? &handle.in : NULL),
+ &partition->out, &partition->err, &partition->pid, -1);
+ HYDU_ERR_POP(status, "create process returned error\n");
- for (arg = 0; client_arg[arg]; arg++)
- HYDU_FREE(client_arg[arg]);
+ for (arg = 0; client_arg[arg]; arg++)
+ HYDU_FREE(client_arg[arg]);
- /* For the remaining processes, set the stdin fd to -1 */
- if (process_id != 0)
- handle.in = -1;
+ /* For the remaining processes, set the stdin fd to -1 */
+ if (process_id != 0)
+ handle.in = -1;
- process_id++;
- }
+ process_id++;
}
fn_exit:
Modified: mpich2/trunk/src/pm/hydra/bootstrap/slurm/slurm_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/slurm/slurm_launch.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/bootstrap/slurm/slurm_launch.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -13,8 +13,7 @@
HYD_Status HYD_BSCD_slurm_launch_procs(void)
{
- struct HYD_Proc_params *proc_params;
- struct HYD_Partition_list *partition;
+ struct HYD_Partition *partition;
char *client_arg[HYD_EXEC_ARGS];
int i, arg, process_id;
HYD_Status status = HYD_SUCCESS;
@@ -26,52 +25,39 @@
* they want launched. Without this functionality, the proxy
* cannot use this and will have to perfom its own launch. */
process_id = 0;
- for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
- for (partition = proc_params->partition; partition; partition = partition->next) {
- if (partition->group_rank) /* Only rank 0 is spawned */
- continue;
+ for (partition = handle.partition_list; partition; partition = partition->next) {
- /* Setup the executable arguments */
- arg = 0;
- /* FIXME: Get the path to srun */
- client_arg[arg++] = MPIU_Strdup("srun");
+ /* Setup the executable arguments */
+ arg = 0;
+ /* FIXME: Get the path to srun */
+ client_arg[arg++] = MPIU_Strdup("srun");
- /* Allow X forwarding only if explicitly requested */
- if (handle.enablex == 1)
- client_arg[arg++] = MPIU_Strdup("-X");
- else if (handle.enablex == 0)
- client_arg[arg++] = MPIU_Strdup("-x");
- else /* default mode is disable X */
- client_arg[arg++] = MPIU_Strdup("-x");
+ /* Currently, we do not support any partition names other than
+ * host names */
+ client_arg[arg++] = MPIU_Strdup(partition->name);
- /* Currently, we do not support any partition names other
- * than host names */
- client_arg[arg++] = MPIU_Strdup(partition->name);
+ for (i = 0; partition->proxy_args[i]; i++)
+ client_arg[arg++] = MPIU_Strdup(partition->proxy_args[i]);
- for (i = 0; partition->args[i]; i++)
- client_arg[arg++] = MPIU_Strdup(partition->args[i]);
+ client_arg[arg++] = NULL;
- client_arg[arg++] = NULL;
+ /* The stdin pointer will be some value for process_id 0; for
+ * everyone else, it's NULL. */
+ status = HYDU_create_process(client_arg, (process_id == 0 ? &handle.in : NULL),
+ &partition->out, &partition->err, &partition->pid, -1);
+ if (status != HYD_SUCCESS) {
+ HYDU_Error_printf("bootstrap spawn process returned error\n");
+ goto fn_fail;
+ }
- /* The stdin pointer will be some value for process_id 0;
- * for everyone else, it's NULL. */
- status = HYDU_create_process(client_arg, (process_id == 0 ? &handle.in : NULL),
- &partition->out, &partition->err, &partition->pid,
- -1);
- if (status != HYD_SUCCESS) {
- HYDU_Error_printf("bootstrap spawn process returned error\n");
- goto fn_fail;
- }
+ for (arg = 0; client_arg[arg]; arg++)
+ HYDU_FREE(client_arg[arg]);
- for (arg = 0; client_arg[arg]; arg++)
- HYDU_FREE(client_arg[arg]);
+ /* For the remaining processes, set the stdin fd to -1 */
+ if (process_id != 0)
+ handle.in = -1;
- /* For the remaining processes, set the stdin fd to -1 */
- if (process_id != 0)
- handle.in = -1;
-
- process_id++;
- }
+ process_id++;
}
fn_exit:
Modified: mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_launch.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_launch.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -19,8 +19,7 @@
*/
HYD_Status HYD_BSCD_ssh_launch_procs(void)
{
- struct HYD_Proc_params *proc_params;
- struct HYD_Partition_list *partition;
+ struct HYD_Partition *partition;
char *client_arg[HYD_EXEC_ARGS];
int i, arg, process_id;
HYD_Status status = HYD_SUCCESS;
@@ -32,47 +31,42 @@
* they want launched. Without this functionality, the proxy
* cannot use this and will have to perfom its own launch. */
process_id = 0;
- for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
- for (partition = proc_params->partition; partition; partition = partition->next) {
- if (partition->group_rank) /* Only rank 0 is spawned */
- continue;
+ for (partition = handle.partition_list; partition; partition = partition->next) {
- /* Setup the executable arguments */
- arg = 0;
- client_arg[arg++] = MPIU_Strdup("/usr/bin/ssh");
+ /* Setup the executable arguments */
+ arg = 0;
+ client_arg[arg++] = MPIU_Strdup("/usr/bin/ssh");
- /* Allow X forwarding only if explicitly requested */
- if (handle.enablex == 1)
- client_arg[arg++] = MPIU_Strdup("-X");
- else if (handle.enablex == 0)
- client_arg[arg++] = MPIU_Strdup("-x");
- else /* default mode is disable X */
- client_arg[arg++] = MPIU_Strdup("-x");
+ /* Allow X forwarding only if explicitly requested */
+ if (handle.enablex == 1)
+ client_arg[arg++] = MPIU_Strdup("-X");
+ else if (handle.enablex == 0)
+ client_arg[arg++] = MPIU_Strdup("-x");
+ else /* default mode is disable X */
+ client_arg[arg++] = MPIU_Strdup("-x");
- /* ssh does not support any partition names other than host names */
- client_arg[arg++] = MPIU_Strdup(partition->name);
+ /* ssh does not support any partition names other than host names */
+ client_arg[arg++] = MPIU_Strdup(partition->name);
- for (i = 0; partition->args[i]; i++)
- client_arg[arg++] = MPIU_Strdup(partition->args[i]);
+ for (i = 0; partition->proxy_args[i]; i++)
+ client_arg[arg++] = MPIU_Strdup(partition->proxy_args[i]);
- client_arg[arg++] = NULL;
+ client_arg[arg++] = NULL;
- /* The stdin pointer will be some value for process_id 0;
- * for everyone else, it's NULL. */
- status = HYDU_create_process(client_arg, (process_id == 0 ? &handle.in : NULL),
- &partition->out, &partition->err, &partition->pid,
- -1);
- HYDU_ERR_POP(status, "create process returned error\n");
+ /* The stdin pointer will be some value for process_id 0; for
+ * everyone else, it's NULL. */
+ status = HYDU_create_process(client_arg, (process_id == 0 ? &handle.in : NULL),
+ &partition->out, &partition->err, &partition->pid, -1);
+ HYDU_ERR_POP(status, "create process returned error\n");
- for (arg = 0; client_arg[arg]; arg++)
- HYDU_FREE(client_arg[arg]);
+ for (arg = 0; client_arg[arg]; arg++)
+ HYDU_FREE(client_arg[arg]);
- /* For the remaining processes, set the stdin fd to -1 */
- if (process_id != 0)
- handle.in = -1;
+ /* For the remaining processes, set the stdin fd to -1 */
+ if (process_id != 0)
+ handle.in = -1;
- process_id++;
- }
+ process_id++;
}
fn_exit:
Modified: mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_wait.c
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_wait.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_wait.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -19,17 +19,15 @@
HYD_Status HYD_BSCU_wait_for_completion(void)
{
int pid, ret_status, not_completed;
- struct HYD_Proc_params *proc_params;
- struct HYD_Partition_list *partition;
+ struct HYD_Partition *partition;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
not_completed = 0;
- for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next)
- for (partition = proc_params->partition; partition; partition = partition->next)
- if (partition->exit_status == -1)
- not_completed++;
+ for (partition = handle.partition_list; partition; partition = partition->next)
+ if (partition->exit_status == -1)
+ not_completed++;
/* We get here only after the I/O sockets have been closed. If the
* application did not manually close its stdout and stderr
@@ -40,14 +38,10 @@
pid = waitpid(-1, &ret_status, WNOHANG);
if (pid > 0) {
/* Find the pid and mark it as complete. */
- for (proc_params = handle.proc_params; proc_params;
- proc_params = proc_params->next) {
- for (partition = proc_params->partition; partition;
- partition = partition->next) {
- if (partition->pid == pid) {
- partition->exit_status = WEXITSTATUS(ret_status);
- not_completed--;
- }
+ for (partition = handle.partition_list; partition; partition = partition->next) {
+ if (partition->pid == pid) {
+ partition->exit_status = WEXITSTATUS(ret_status);
+ not_completed--;
}
}
}
Modified: mpich2/trunk/src/pm/hydra/control/consys/consys_close.c
===================================================================
--- mpich2/trunk/src/pm/hydra/control/consys/consys_close.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/control/consys/consys_close.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -14,8 +14,7 @@
HYD_Status HYD_CSI_close_fd(int fd)
{
- struct HYD_Proc_params *proc_params;
- struct HYD_Partition_list *partition;
+ struct HYD_Partition *partition;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
@@ -26,17 +25,15 @@
close(fd);
/* Find the FD in the handle and remove it. */
- for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
- for (partition = proc_params->partition; partition; partition = partition->next) {
- if (partition->out == fd) {
- partition->out = -1;
- goto fn_exit;
- }
- if (partition->err == fd) {
- partition->err = -1;
- goto fn_exit;
- }
+ for (partition = handle.partition_list; partition; partition = partition->next) {
+ if (partition->out == fd) {
+ partition->out = -1;
+ goto fn_exit;
}
+ if (partition->err == fd) {
+ partition->err = -1;
+ goto fn_exit;
+ }
}
fn_exit:
Modified: mpich2/trunk/src/pm/hydra/control/consys/consys_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/control/consys/consys_launch.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/control/consys/consys_launch.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -14,8 +14,7 @@
HYD_Status HYD_CSI_launch_procs(void)
{
- struct HYD_Proc_params *proc_params;
- struct HYD_Partition_list *partition;
+ struct HYD_Partition *partition;
int stdin_fd;
HYD_Status status = HYD_SUCCESS;
@@ -24,16 +23,12 @@
status = HYD_PMCI_launch_procs();
HYDU_ERR_POP(status, "PM returned error while launching processes\n");
- for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
- for (partition = proc_params->partition; partition; partition = partition->next) {
- status =
- HYD_DMX_register_fd(1, &partition->out, HYD_STDOUT, proc_params->stdout_cb);
- HYDU_ERR_POP(status, "demux returned error registering fd\n");
+ for (partition = handle.partition_list; partition; partition = partition->next) {
+ status = HYD_DMX_register_fd(1, &partition->out, HYD_STDOUT, handle.stdout_cb);
+ HYDU_ERR_POP(status, "demux returned error registering fd\n");
- status =
- HYD_DMX_register_fd(1, &partition->err, HYD_STDOUT, proc_params->stderr_cb);
- HYDU_ERR_POP(status, "demux returned error registering fd\n");
- }
+ status = HYD_DMX_register_fd(1, &partition->err, HYD_STDOUT, handle.stderr_cb);
+ HYDU_ERR_POP(status, "demux returned error registering fd\n");
}
if (handle.in != -1) { /* Only process_id 0 */
Modified: mpich2/trunk/src/pm/hydra/control/consys/consys_wait.c
===================================================================
--- mpich2/trunk/src/pm/hydra/control/consys/consys_wait.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/control/consys/consys_wait.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -15,8 +15,7 @@
HYD_Status HYD_CSI_wait_for_completion(void)
{
int sockets_open;
- struct HYD_Proc_params *proc_params;
- struct HYD_Partition_list *partition;
+ struct HYD_Partition *partition;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
@@ -29,15 +28,11 @@
/* Check to see if there's any open read socket left; if there
* are, we will just wait for more events. */
sockets_open = 0;
- for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
- for (partition = proc_params->partition; partition; partition = partition->next) {
- if (partition->out != -1 || partition->err != -1) {
- sockets_open++;
- break;
- }
- }
- if (sockets_open)
+ for (partition = handle.partition_list; partition; partition = partition->next) {
+ if (partition->out != -1 || partition->err != -1) {
+ sockets_open++;
break;
+ }
}
if (sockets_open && HYDU_time_left(handle.start, handle.timeout))
Modified: mpich2/trunk/src/pm/hydra/demux/demux.c
===================================================================
--- mpich2/trunk/src/pm/hydra/demux/demux.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/demux/demux.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -32,8 +32,7 @@
for (i = 0; i < num_fds; i++)
if (fd[i] < 0)
- HYDU_ERR_SETANDJUMP1(status, HYD_INTERNAL_ERROR, "registering bad fd %d\n",
- fd[i]);
+ HYDU_ERR_SETANDJUMP1(status, HYD_INTERNAL_ERROR, "registering bad fd %d\n", fd[i]);
HYDU_MALLOC(cb_element, HYD_DMXI_callback_t *, sizeof(HYD_DMXI_callback_t), status);
cb_element->num_fds = num_fds;
Modified: mpich2/trunk/src/pm/hydra/include/hydra.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra.h 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/include/hydra.h 2009-03-22 05:18:11 UTC (rev 4156)
@@ -30,6 +30,8 @@
int in;
HYD_Status(*stdin_cb) (int fd, HYD_Event_t events);
+ HYD_Status(*stdout_cb) (int fd, HYD_Event_t events);
+ HYD_Status(*stderr_cb) (int fd, HYD_Event_t events);
/* Start time and timeout. These are filled in by the launcher,
* but are utilized by the demux engine and the boot-strap server
@@ -37,27 +39,11 @@
HYD_Time start;
HYD_Time timeout;
- /* Each structure will contain all hosts/cores that use the same
- * executable and environment. */
- struct HYD_Proc_params {
- int exec_proc_count;
- char *exec[HYD_EXEC_ARGS];
+ int one_pass_count;
- struct HYD_Partition_list *partition;
+ struct HYD_Exec_info *exec_info_list;
+ struct HYD_Partition *partition_list;
- /* Local environment */
- HYD_Env_t *user_env;
- HYD_Env_prop_t prop;
- HYD_Env_t *prop_env;
-
- /* Callback functions for the stdout/stderr events. These can
- * be the same. */
- HYD_Status(*stdout_cb) (int fd, HYD_Event_t events);
- HYD_Status(*stderr_cb) (int fd, HYD_Event_t events);
-
- struct HYD_Proc_params *next;
- } *proc_params;
-
/* Random parameters used for internal code */
int func_depth;
char stdin_tmp_buf[HYD_TMPBUF_SIZE];
@@ -67,20 +53,6 @@
typedef struct HYD_Handle_ HYD_Handle;
-/* We'll use this as the central handle that has most of the
- * information needed by everyone. All data to be written has to be
- * done before the HYD_CSI_wait_for_completion() function is called,
- * except for two exceptions:
- *
- * 1. The timeout value is initially added by the launcher before the
- * HYD_CSI_wait_for_completion() function is called, but can be edited
- * by the control system within this call. There's no guarantee on
- * what value it will contain for the other layers.
- *
- * 2. There is no guarantee on what the exit status will contain till
- * the HYD_CSI_wait_for_completion() function returns (where the
- * bootstrap server can fill out these values).
- */
extern HYD_Handle handle;
#endif /* HYDRA_H_INCLUDED */
Modified: mpich2/trunk/src/pm/hydra/include/hydra_base.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra_base.h 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/include/hydra_base.h 2009-03-22 05:18:11 UTC (rev 4156)
@@ -58,6 +58,16 @@
#define HYD_DEFAULT_PROXY_PORT 9899
+#define HYD_STDOUT (1)
+#define HYD_STDIN (2)
+
+typedef unsigned short HYD_Event_t;
+
+#define HYD_TMPBUF_SIZE (64 * 1024)
+#define HYD_EXEC_ARGS 200
+
+
+/* Status information */
typedef enum {
HYD_SUCCESS = 0,
HYD_GRACEFUL_ABORT,
@@ -67,6 +77,74 @@
HYD_INTERNAL_ERROR
} HYD_Status;
+
+/* Environment information */
+typedef struct HYD_Env {
+ char *env_name;
+ char *env_value;
+ struct HYD_Env *next;
+} HYD_Env_t;
+
+typedef enum {
+ HYD_ENV_PROP_UNSET,
+ HYD_ENV_PROP_ALL,
+ HYD_ENV_PROP_NONE,
+ HYD_ENV_PROP_LIST
+} HYD_Env_prop_t;
+
+/* List of contiguous segments of processes on a partition */
+struct HYD_Partition_segment {
+ int start_pid;
+ int proc_count;
+ char **mapping;
+ struct HYD_Partition_segment *next;
+};
+
+/* Executables on a partition */
+struct HYD_Partition_exec {
+ char *exec[HYD_EXEC_ARGS];
+ int proc_count;
+ HYD_Env_prop_t prop;
+ HYD_Env_t *prop_env;
+ struct HYD_Partition_exec *next;
+};
+
+/* Partition information */
+struct HYD_Partition {
+ char *name;
+ int total_proc_count;
+
+ /* Segment list will contain one-pass of the hosts file */
+ struct HYD_Partition_segment *segment_list;
+ struct HYD_Partition_exec *exec_list;
+
+ /* Spawn information: each partition can have one or more
+ * proxies. For the time being, we only support one proxy per
+ * partition, but this can be easily extended later. We will also
+ * need to give different ports for the proxies to listen on in
+ * that case. */
+ int pid;
+ int out;
+ int err;
+ int exit_status;
+ char *proxy_args[HYD_EXEC_ARGS]; /* Full argument list */
+
+ struct HYD_Partition *next;
+};
+
+struct HYD_Exec_info {
+ int exec_proc_count;
+ char *exec[HYD_EXEC_ARGS];
+
+ /* Local environment */
+ HYD_Env_t *user_env;
+ HYD_Env_prop_t prop;
+ HYD_Env_t *prop_env;
+
+ struct HYD_Exec_info *next;
+} *exec_info;
+
+
#define HYDU_ERR_POP(status, message) \
{ \
if (status != HYD_SUCCESS && status != HYD_GRACEFUL_ABORT) { \
@@ -124,27 +202,7 @@
} \
}
-#define HYD_STDOUT (1)
-#define HYD_STDIN (2)
-typedef unsigned short HYD_Event_t;
-
-#define HYD_TMPBUF_SIZE (64 * 1024)
-#define HYD_EXEC_ARGS 200
-
-typedef struct HYD_Env {
- char *env_name;
- char *env_value;
- struct HYD_Env *next;
-} HYD_Env_t;
-
-typedef enum {
- HYD_ENV_PROP_UNSET,
- HYD_ENV_PROP_ALL,
- HYD_ENV_PROP_NONE,
- HYD_ENV_PROP_LIST
-} HYD_Env_prop_t;
-
#if defined ENABLE_WARNINGS
#define HYDU_Warn_printf HYDU_Error_printf
#else
Modified: mpich2/trunk/src/pm/hydra/include/hydra_utils.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra_utils.h 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/include/hydra_utils.h 2009-03-22 05:18:11 UTC (rev 4156)
@@ -24,6 +24,7 @@
/* env */
+HYD_Env_t *HYDU_str_to_env(char *str);
HYD_Status HYDU_list_append_env_to_str(HYD_Env_t * env_list, char **str_list);
HYD_Status HYDU_list_global_env(HYD_Env_t ** env_list);
HYD_Env_t *HYDU_env_list_dup(HYD_Env_t * env);
@@ -33,28 +34,19 @@
HYD_Env_t *HYDU_env_lookup(HYD_Env_t env, HYD_Env_t * env_list);
HYD_Status HYDU_append_env_to_list(HYD_Env_t env, HYD_Env_t ** env_list);
void HYDU_putenv(char *env_str);
-HYD_Status HYDU_comma_list_to_env_list(char *str, HYD_Env_t **env_list);
+HYD_Status HYDU_comma_list_to_env_list(char *str, HYD_Env_t ** env_list);
/* launch */
-struct HYD_Partition_list {
- char *name;
- int proc_count;
- char **mapping; /* Can be core IDs or something else */
-
- int group_id; /* Assumed to be in ascending order */
- int group_rank; /* Rank within the group */
- int pid;
- int out;
- int err;
- int exit_status;
- char *args[HYD_EXEC_ARGS];
-
- struct HYD_Partition_list *next;
-};
-
-HYD_Status HYDU_alloc_partition(struct HYD_Partition_list **partition);
-void HYDU_free_partition_list(struct HYD_Partition_list *partition);
+HYD_Status HYDU_alloc_partition(struct HYD_Partition **partition);
+void HYDU_free_partition_list(struct HYD_Partition *partition);
+HYD_Status HYDU_alloc_exec_info(struct HYD_Exec_info **exec_info);
+void HYDU_free_exec_info_list(struct HYD_Exec_info *exec_info_list);
+HYD_Status HYDU_alloc_partition_segment(struct HYD_Partition_segment **segment);
+HYD_Status HYDU_merge_partition_segment(char *name, struct HYD_Partition_segment *segment,
+ struct HYD_Partition **partition_list);
+HYD_Status HYDU_alloc_partition_exec(struct HYD_Partition_exec **exec);
+HYD_Status HYDU_create_host_list(char *host_file, struct HYD_Partition **partition_list);
HYD_Status HYDU_create_process(char **client_arg, int *in, int *out, int *err,
int *pid, int core);
Modified: mpich2/trunk/src/pm/hydra/launcher/mpiexec/callback.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/mpiexec/callback.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/launcher/mpiexec/callback.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -51,7 +51,7 @@
HYDU_ERR_SETANDJUMP2(status, status, "stdout callback error on %d (%s)\n",
fd, HYDU_strerror(errno))
- if (closed) {
+ if (closed) {
status = HYD_CSI_close_fd(fd);
HYDU_ERR_SETANDJUMP2(status, status, "socket close error on fd %d (%s)\n",
fd, HYDU_strerror(errno));
Modified: mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -47,8 +47,7 @@
int main(int argc, char **argv)
{
- struct HYD_Proc_params *proc_params;
- struct HYD_Partition_list *partition;
+ struct HYD_Partition *partition;
int exit_status = 0;
int timeout;
HYD_Status status = HYD_SUCCESS;
@@ -64,23 +63,22 @@
goto fn_fail;
}
- if (handle.debug)
- HYD_LCHU_print_params();
-
/* Convert the host file to a host list */
- status = HYD_LCHU_create_host_list();
+ status = HYDU_create_host_list(handle.host_file, &handle.partition_list);
HYDU_ERR_POP(status, "unable to create host list\n");
/* Consolidate the environment list that we need to propagate */
status = HYD_LCHU_create_env_list();
HYDU_ERR_POP(status, "unable to create env list\n");
- proc_params = handle.proc_params;
- while (proc_params) {
- proc_params->stdout_cb = HYD_LCHI_stdout_cb;
- proc_params->stderr_cb = HYD_LCHI_stderr_cb;
- proc_params = proc_params->next;
- }
+ status = HYD_LCHU_merge_exec_info_to_partition();
+ HYDU_ERR_POP(status, "unable to merge exec info\n");
+
+ if (handle.debug)
+ HYD_LCHU_print_params();
+
+ handle.stdout_cb = HYD_LCHI_stdout_cb;
+ handle.stderr_cb = HYD_LCHI_stderr_cb;
handle.stdin_cb = HYD_LCHI_stdin_cb;
HYDU_time_set(&handle.start, NULL); /* NULL implies right now */
@@ -101,9 +99,8 @@
/* Check for the exit status for all the processes */
exit_status = 0;
- for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next)
- for (partition = proc_params->partition; partition; partition = partition->next)
- exit_status |= partition->exit_status;
+ for (partition = handle.partition_list; partition; partition = partition->next)
+ exit_status |= partition->exit_status;
/* Call finalize functions for lower layers to cleanup their resources */
status = HYD_CSI_finalize();
Modified: mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -32,11 +32,11 @@
HYD_Status HYD_LCHI_get_parameters(char **t_argv)
{
- int i;
+ int i, env_pref;
char **argv = t_argv;
char *env_name, *env_value, *str1, *str2, *progname = *argv;
HYD_Env_t *env;
- struct HYD_Proc_params *proc_params;
+ struct HYD_Exec_info *exec_info;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
@@ -108,35 +108,35 @@
}
if (!strcmp(*argv, "-envall")) {
- status = HYD_LCHU_get_current_proc_params(&proc_params);
- HYDU_ERR_POP(status, "get_current_proc_params returned error\n");
+ status = HYD_LCHU_get_current_exec_info(&exec_info);
+ HYDU_ERR_POP(status, "get_current_exec_info returned error\n");
- HYDU_ERR_CHKANDJUMP(status, proc_params->prop != HYD_ENV_PROP_UNSET,
+ HYDU_ERR_CHKANDJUMP(status, exec_info->prop != HYD_ENV_PROP_UNSET,
HYD_INTERNAL_ERROR, "duplicate prop setting\n");
- proc_params->prop = HYD_ENV_PROP_ALL;
+ exec_info->prop = HYD_ENV_PROP_ALL;
continue;
}
if (!strcmp(*argv, "-envnone")) {
- status = HYD_LCHU_get_current_proc_params(&proc_params);
- HYDU_ERR_POP(status, "get_current_proc_params returned error\n");
+ status = HYD_LCHU_get_current_exec_info(&exec_info);
+ HYDU_ERR_POP(status, "get_current_exec_info returned error\n");
- HYDU_ERR_CHKANDJUMP(status, proc_params->prop != HYD_ENV_PROP_UNSET,
+ HYDU_ERR_CHKANDJUMP(status, exec_info->prop != HYD_ENV_PROP_UNSET,
HYD_INTERNAL_ERROR, "duplicate prop setting\n");
- proc_params->prop = HYD_ENV_PROP_NONE;
+ exec_info->prop = HYD_ENV_PROP_NONE;
continue;
}
if (!strcmp(*argv, "-envlist")) {
- status = HYD_LCHU_get_current_proc_params(&proc_params);
- HYDU_ERR_POP(status, "get_current_proc_params returned error\n");
+ status = HYD_LCHU_get_current_exec_info(&exec_info);
+ HYDU_ERR_POP(status, "get_current_exec_info returned error\n");
- HYDU_ERR_CHKANDJUMP(status, proc_params->prop != HYD_ENV_PROP_UNSET,
+ HYDU_ERR_CHKANDJUMP(status, exec_info->prop != HYD_ENV_PROP_UNSET,
HYD_INTERNAL_ERROR, "duplicate prop setting\n");
- proc_params->prop = HYD_ENV_PROP_LIST;
+ exec_info->prop = HYD_ENV_PROP_LIST;
INCREMENT_ARGV(status);
- HYDU_comma_list_to_env_list(*argv, &proc_params->user_env);
+ HYDU_comma_list_to_env_list(*argv, &exec_info->user_env);
continue;
}
@@ -149,10 +149,10 @@
status = HYDU_env_create(&env, env_name, env_value);
HYDU_ERR_POP(status, "unable to create env struct\n");
- status = HYD_LCHU_get_current_proc_params(&proc_params);
- HYDU_ERR_POP(status, "get_current_proc_params returned error\n");
+ status = HYD_LCHU_get_current_exec_info(&exec_info);
+ HYDU_ERR_POP(status, "get_current_exec_info returned error\n");
- HYDU_append_env_to_list(*env, &proc_params->user_env);
+ HYDU_append_env_to_list(*env, &exec_info->user_env);
continue;
}
@@ -165,13 +165,13 @@
if (!strcmp(*argv, "-n") || !strcmp(*argv, "-np")) {
INCREMENT_ARGV(status);
- status = HYD_LCHU_get_current_proc_params(&proc_params);
- HYDU_ERR_POP(status, "get_current_proc_params returned error\n");
+ status = HYD_LCHU_get_current_exec_info(&exec_info);
+ HYDU_ERR_POP(status, "get_current_exec_info returned error\n");
- if (proc_params->exec_proc_count != 0)
+ if (exec_info->exec_proc_count != 0)
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "duplicate proc count\n");
- proc_params->exec_proc_count = atoi(*argv);
+ exec_info->exec_proc_count = atoi(*argv);
continue;
}
@@ -211,22 +211,22 @@
if (*argv[0] == '-')
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "unrecognized argument\n");
- status = HYD_LCHU_get_current_proc_params(&proc_params);
- HYDU_ERR_POP(status, "get_current_proc_params returned error\n");
+ status = HYD_LCHU_get_current_exec_info(&exec_info);
+ HYDU_ERR_POP(status, "get_current_exec_info returned error\n");
/* Read the executable till you hit the end of a ":" */
do {
if (!strcmp(*argv, ":")) { /* Next executable */
- status = HYD_LCHU_allocate_proc_params(&proc_params->next);
- HYDU_ERR_POP(status, "allocate_proc_params returned error\n");
+ status = HYDU_alloc_exec_info(&exec_info->next);
+ HYDU_ERR_POP(status, "allocate_exec_info returned error\n");
break;
}
i = 0;
- while (proc_params->exec[i] != NULL)
+ while (exec_info->exec[i] != NULL)
i++;
- proc_params->exec[i] = MPIU_Strdup(*argv);
- proc_params->exec[i + 1] = NULL;
+ exec_info->exec[i] = MPIU_Strdup(*argv);
+ exec_info->exec[i + 1] = NULL;
} while (++argv && *argv);
if (!(*argv))
@@ -238,7 +238,7 @@
if (handle.debug == -1)
handle.debug = 0;
- if (handle.proc_params == NULL)
+ if (handle.exec_info_list == NULL)
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "no local options set\n");
if (handle.wdir == NULL) {
@@ -261,24 +261,38 @@
status = HYDU_get_base_path(progname, handle.wdir, &handle.base_path);
HYDU_ERR_POP(status, "unable to get base path\n");
- proc_params = handle.proc_params;
- while (proc_params) {
- if (proc_params->exec[0] == NULL)
+ /* Check if any individual app has an environment preference */
+ env_pref = 0;
+ for (exec_info = handle.exec_info_list; exec_info; exec_info = exec_info->next) {
+ if (exec_info->exec[0] == NULL)
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "no executable specified\n");
- if (proc_params->exec_proc_count == 0)
- proc_params->exec_proc_count = 1;
+ if (exec_info->exec_proc_count == 0)
+ exec_info->exec_proc_count = 1;
- if (handle.prop == HYD_ENV_PROP_UNSET && proc_params->prop == HYD_ENV_PROP_UNSET)
- proc_params->prop = HYD_ENV_PROP_ALL;
+ if (handle.prop == HYD_ENV_PROP_UNSET && exec_info->prop != HYD_ENV_PROP_UNSET)
+ env_pref = 1;
+ }
- proc_params = proc_params->next;
+ /* Only if someone has an individual preference, assign executable
+ * specific environment. Otherwise, just optimize by setting one
+ * global environment (common case). */
+ if (env_pref) {
+ for (exec_info = handle.exec_info_list; exec_info; exec_info = exec_info->next)
+ if (exec_info->prop == HYD_ENV_PROP_UNSET)
+ exec_info->prop = HYD_ENV_PROP_ALL;
}
+ else if (handle.prop == HYD_ENV_PROP_UNSET)
+ handle.prop = HYD_ENV_PROP_ALL;
if (handle.proxy_port == -1)
handle.proxy_port = HYD_DEFAULT_PROXY_PORT;
fn_exit:
+ if (str1)
+ HYDU_FREE(str1);
+ if (str2)
+ HYDU_FREE(str2);
HYDU_FUNC_EXIT();
return status;
Modified: mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -29,10 +29,15 @@
handle.in = -1;
handle.stdin_cb = NULL;
+ handle.stdout_cb = NULL;
+ handle.stderr_cb = NULL;
/* FIXME: Should the timers be initialized? */
- handle.proc_params = NULL;
+ handle.one_pass_count = 0;
+ handle.exec_info_list = NULL;
+ handle.partition_list = NULL;
+
handle.func_depth = 0;
handle.stdin_buf_offset = 0;
handle.stdin_buf_count = 0;
@@ -45,6 +50,7 @@
HYDU_FREE(handle.base_path);
handle.base_path = NULL;
}
+
if (handle.bootstrap) {
HYDU_FREE(handle.bootstrap);
handle.bootstrap = NULL;
@@ -79,138 +85,65 @@
handle.prop_env = NULL;
}
- if (handle.proc_params) {
- HYD_LCHU_free_proc_params();
- handle.proc_params = NULL;
+ if (handle.exec_info_list) {
+ HYDU_free_exec_info_list(handle.exec_info_list);
+ handle.exec_info_list = NULL;
}
-}
-
-void HYD_LCHU_free_proc_params(void)
-{
- struct HYD_Proc_params *proc_params, *run;
-
- HYDU_FUNC_ENTER();
-
- proc_params = handle.proc_params;
- while (proc_params) {
- run = proc_params->next;
- HYDU_free_strlist(proc_params->exec);
- HYDU_free_partition_list(proc_params->partition);
- proc_params->partition = NULL;
-
- HYDU_env_free_list(proc_params->user_env);
- proc_params->user_env = NULL;
- HYDU_env_free_list(proc_params->prop_env);
- proc_params->prop_env = NULL;
-
- HYDU_FREE(proc_params);
- proc_params = run;
+ if (handle.partition_list) {
+ HYDU_free_partition_list(handle.partition_list);
+ handle.partition_list = NULL;
}
-
- HYDU_FUNC_EXIT();
}
-HYD_Status HYD_LCHU_create_host_list(void)
+HYD_Status HYD_LCHU_create_env_list(void)
{
- FILE *fp = NULL;
- char line[2 * MAX_HOSTNAME_LEN], *hostname, *procs;
- struct HYD_Proc_params *proc_params;
- struct HYD_Partition_list *partition, *run;
- int num_procs, total_procs;
+ struct HYD_Exec_info *exec_info;
+ HYD_Env_t *env, *run;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
- if (strcmp(handle.host_file, "HYDRA_USE_LOCALHOST")) {
- fp = fopen(handle.host_file, "r");
- if (fp == NULL)
- HYDU_ERR_SETANDJUMP1(status, HYD_INTERNAL_ERROR,
- "unable to open host file: %s\n", handle.host_file);
- }
-
- HYDU_Debug("Partition list: ");
- proc_params = handle.proc_params;
- while (proc_params) {
- if (!strcmp(handle.host_file, "HYDRA_USE_LOCALHOST")) {
- HYDU_alloc_partition(&proc_params->partition);
- proc_params->partition->name = MPIU_Strdup("localhost");
- proc_params->partition->proc_count = proc_params->exec_proc_count;
- total_procs = proc_params->exec_proc_count;
- HYDU_Debug("%s:%d ", proc_params->partition->name, proc_params->exec_proc_count);
+ if (handle.prop == HYD_ENV_PROP_ALL) {
+ handle.prop_env = HYDU_env_list_dup(handle.global_env);
+ for (env = handle.user_env; env; env = env->next) {
+ status = HYDU_append_env_to_list(*env, &handle.prop_env);
+ HYDU_ERR_POP(status, "unable to add env to list\n");
}
- else {
- total_procs = 0;
- while (!feof(fp)) {
- if ((fscanf(fp, "%s", line) < 0) && errno)
- HYDU_ERR_SETANDJUMP1(status, HYD_INTERNAL_ERROR,
- "unable to read input line (errno: %d)\n", errno);
- if (feof(fp))
- break;
-
- hostname = strtok(line, ":");
- procs = strtok(NULL, ":");
-
- num_procs = procs ? atoi(procs) : 1;
- if (num_procs > (proc_params->exec_proc_count - total_procs))
- num_procs = (proc_params->exec_proc_count - total_procs);
-
- if (!proc_params->partition) {
- HYDU_alloc_partition(&proc_params->partition);
- partition = proc_params->partition;
- }
- else {
- for (partition = proc_params->partition;
- partition->next; partition = partition->next);
- HYDU_alloc_partition(&partition->next);
- partition = partition->next;
- }
- partition->name = MPIU_Strdup(hostname);
- partition->proc_count = num_procs;
-
- total_procs += num_procs;
-
- HYDU_Debug("%s:%d ", partition->name, partition->proc_count);
-
- if (total_procs == proc_params->exec_proc_count)
- break;
+ }
+ else if (handle.prop == HYD_ENV_PROP_LIST) {
+ for (env = handle.user_env; env; env = env->next) {
+ run = HYDU_env_lookup(*env, handle.global_env);
+ if (run) {
+ status = HYDU_append_env_to_list(*run, &handle.prop_env);
+ HYDU_ERR_POP(status, "unable to add env to list\n");
}
}
+ }
- /* We don't have enough processes; use whatever we have */
- if (total_procs != proc_params->exec_proc_count) {
- for (partition = proc_params->partition;
- partition->next; partition = partition->next);
- run = proc_params->partition;
-
- /* Optimize the case where there is only one node */
- if (run->next == NULL) {
- run->proc_count = proc_params->exec_proc_count;
- HYDU_Debug("%s:%d ", run->name, run->proc_count);
+ exec_info = handle.exec_info_list;
+ while (exec_info) {
+ if (exec_info->prop == HYD_ENV_PROP_ALL) {
+ exec_info->prop_env = HYDU_env_list_dup(handle.global_env);
+ for (env = exec_info->user_env; env; env = env->next) {
+ status = HYDU_append_env_to_list(*env, &exec_info->prop_env);
+ HYDU_ERR_POP(status, "unable to add env to list\n");
}
- else {
- while (total_procs != proc_params->exec_proc_count) {
- HYDU_alloc_partition(&partition->next);
- partition = partition->next;
- partition->name = MPIU_Strdup(run->name);
- partition->proc_count = run->proc_count;
-
- HYDU_Debug("%s:%d ", partition->name, partition->proc_count);
-
- total_procs += partition->proc_count;
- run = run->next;
+ }
+ else if (exec_info->prop == HYD_ENV_PROP_LIST) {
+ for (env = exec_info->user_env; env; env = env->next) {
+ run = HYDU_env_lookup(*env, handle.global_env);
+ if (run) {
+ status = HYDU_append_env_to_list(*run, &exec_info->prop_env);
+ HYDU_ERR_POP(status, "unable to add env to list\n");
}
}
}
-
- proc_params = proc_params->next;
+ exec_info = exec_info->next;
}
- HYDU_Debug("\n");
fn_exit:
- if (fp)
- fclose(fp);
HYDU_FUNC_EXIT();
return status;
@@ -219,82 +152,96 @@
}
-HYD_Status HYD_LCHU_free_host_list(void)
+HYD_Status HYD_LCHU_get_current_exec_info(struct HYD_Exec_info **info)
{
- struct HYD_Proc_params *proc_params;
- struct HYD_Partition_list *partition, *next;
- int i;
+ struct HYD_Exec_info *exec_info;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
- for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
- for (partition = proc_params->partition; partition;) {
- HYDU_FREE(partition->name);
- if (partition->mapping) {
- for (i = 0;; i++)
- if (partition->mapping[i])
- HYDU_FREE(partition->mapping[i]);
- HYDU_FREE(partition->mapping);
- }
- for (i = 0; partition->args[i]; i++)
- HYDU_FREE(partition->args[i]);
- next = partition->next;
- HYDU_FREE(partition);
- partition = next;
- }
+ if (handle.exec_info_list == NULL) {
+ status = HYDU_alloc_exec_info(&handle.exec_info_list);
+ HYDU_ERR_POP(status, "unable to allocate exec_info\n");
}
- HYDU_FREE(handle.host_file);
+ exec_info = handle.exec_info_list;
+ while (exec_info->next)
+ exec_info = exec_info->next;
+
+ *info = exec_info;
+
+ fn_exit:
HYDU_FUNC_EXIT();
return status;
+
+ fn_fail:
+ goto fn_exit;
}
-HYD_Status HYD_LCHU_create_env_list(void)
+HYD_Status HYD_LCHU_merge_exec_info_to_partition(void)
{
- struct HYD_Proc_params *proc_params;
- HYD_Env_t *env, *run;
+ int total_procs, run_count, i, rem;
+ struct HYD_Partition *partition;
+ struct HYD_Exec_info *exec_info;
+ struct HYD_Partition_exec *exec;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
- if (handle.prop == HYD_ENV_PROP_ALL) {
- handle.prop_env = HYDU_env_list_dup(handle.global_env);
- for (env = handle.user_env; env; env = env->next) {
- status = HYDU_append_env_to_list(*env, &handle.prop_env);
- HYDU_ERR_POP(status, "unable to add env to list\n");
- }
- }
- else if (handle.prop == HYD_ENV_PROP_LIST) {
- for (env = handle.user_env; env; env = env->next) {
- run = HYDU_env_lookup(*env, handle.global_env);
- if (run) {
- status = HYDU_append_env_to_list(*run, &handle.prop_env);
- HYDU_ERR_POP(status, "unable to add env to list\n");
+ total_procs = 0;
+ for (partition = handle.partition_list; partition; partition = partition->next)
+ total_procs += partition->total_proc_count;
+
+ for (exec_info = handle.exec_info_list; exec_info; exec_info = exec_info->next) {
+ /* The run_count tells us how many processes the partitions
+ * before us can host */
+ run_count = 0;
+ for (partition = handle.partition_list; partition; partition = partition->next) {
+ if (run_count >= exec_info->exec_proc_count)
+ break;
+
+ if (partition->exec_list == NULL) {
+ status = HYDU_alloc_partition_exec(&partition->exec_list);
+ HYDU_ERR_POP(status, "unable to allocate partition exec\n");
+
+ for (i = 0; exec_info->exec[i]; i++)
+ partition->exec_list->exec[i] = MPIU_Strdup(exec_info->exec[i]);
+ partition->exec_list->exec[i] = NULL;
+
+ partition->exec_list->proc_count =
+ ((exec_info->exec_proc_count / total_procs) * partition->total_proc_count);
+ rem = (exec_info->exec_proc_count % total_procs);
+ if (rem > run_count + partition->total_proc_count)
+ rem = run_count + partition->total_proc_count;
+ partition->exec_list->proc_count += (rem > run_count) ? (rem - run_count) : 0;
+
+ partition->exec_list->prop = exec_info->prop;
+ partition->exec_list->prop_env = HYDU_env_list_dup(exec_info->prop_env);
}
- }
- }
+ else {
+ for (exec = partition->exec_list; exec->next; exec = exec->next);
+ status = HYDU_alloc_partition_exec(&exec->next);
+ HYDU_ERR_POP(status, "unable to allocate partition exec\n");
- proc_params = handle.proc_params;
- while (proc_params) {
- if (proc_params->prop == HYD_ENV_PROP_ALL) {
- proc_params->prop_env = HYDU_env_list_dup(handle.global_env);
- for (env = proc_params->user_env; env; env = env->next) {
- status = HYDU_append_env_to_list(*env, &proc_params->prop_env);
- HYDU_ERR_POP(status, "unable to add env to list\n");
+ exec = exec->next;
+ for (i = 0; exec_info->exec[i]; i++)
+ exec->exec[i] = MPIU_Strdup(exec_info->exec[i]);
+ exec->exec[i] = NULL;
+
+ exec->proc_count =
+ ((exec_info->exec_proc_count / total_procs) * partition->total_proc_count);
+ rem = (exec_info->exec_proc_count % total_procs);
+ if (rem > run_count + partition->total_proc_count)
+ rem = run_count + partition->total_proc_count;
+ exec->proc_count += (rem > run_count) ? (rem - run_count) : 0;
+
+ exec->prop = exec_info->prop;
+ exec->prop_env = HYDU_env_list_dup(exec_info->prop_env);
}
+
+ run_count += partition->total_proc_count;
}
- else if (proc_params->prop == HYD_ENV_PROP_LIST) {
- for (env = proc_params->user_env; env; env = env->next) {
- run = HYDU_env_lookup(*env, handle.global_env);
- if (run) {
- status = HYDU_append_env_to_list(*run, &proc_params->prop_env);
- HYDU_ERR_POP(status, "unable to add env to list\n");
- }
- }
- }
- proc_params = proc_params->next;
}
fn_exit:
@@ -309,9 +256,11 @@
void HYD_LCHU_print_params(void)
{
HYD_Env_t *env;
- int i, j;
- struct HYD_Proc_params *proc_params;
- struct HYD_Partition_list *partition;
+ int i;
+ struct HYD_Partition *partition;
+ struct HYD_Partition_segment *segment;
+ struct HYD_Partition_exec *exec;
+ struct HYD_Exec_info *exec_info;
HYDU_FUNC_ENTER();
@@ -353,35 +302,46 @@
HYDU_Debug("\n\n");
- HYDU_Debug(" Process parameters:\n");
- HYDU_Debug(" *******************\n");
+ HYDU_Debug(" Executable information:\n");
+ HYDU_Debug(" **********************\n");
i = 1;
- for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
+ for (exec_info = handle.exec_info_list; exec_info; exec_info = exec_info->next) {
HYDU_Debug(" Executable ID: %2d\n", i++);
HYDU_Debug(" -----------------\n");
- HYDU_Debug(" Process count: %d\n", proc_params->exec_proc_count);
+ HYDU_Debug(" Process count: %d\n", exec_info->exec_proc_count);
HYDU_Debug(" Executable: ");
- for (j = 0; proc_params->exec[j]; j++)
- HYDU_Debug("%s ", proc_params->exec[j]);
+ HYDU_print_strlist(exec_info->exec);
HYDU_Debug("\n");
- if (proc_params->user_env) {
+ if (exec_info->user_env) {
HYDU_Debug("\n");
HYDU_Debug(" User set environment:\n");
HYDU_Debug(" .....................\n");
- for (env = proc_params->user_env; env; env = env->next)
+ for (env = exec_info->user_env; env; env = env->next)
HYDU_Debug(" %s=%s\n", env->env_name, env->env_value);
}
+ }
- j = 0;
- for (partition = proc_params->partition; partition; partition = partition->next) {
- HYDU_Debug("\n");
- HYDU_Debug(" Partition ID: %2d\n", j++);
- HYDU_Debug(" ----------------\n");
- HYDU_Debug(" Partition name: %s\n", partition->name);
- HYDU_Debug(" Partition process count: %d\n", partition->proc_count);
- HYDU_Debug("\n");
- }
+ HYDU_Debug(" Partition information:\n");
+ HYDU_Debug(" *********************\n");
+ i = 1;
+ for (partition = handle.partition_list; partition; partition = partition->next) {
+ HYDU_Debug(" Partition ID: %2d\n", i++);
+ HYDU_Debug(" -----------------\n");
+ HYDU_Debug(" Partition name: %s\n", partition->name);
+ HYDU_Debug(" Process count: %d\n", partition->total_proc_count);
+ HYDU_Debug("\n");
+ HYDU_Debug(" Partition segment list:\n");
+ HYDU_Debug(" .......................\n");
+ for (segment = partition->segment_list; segment; segment = segment->next)
+ HYDU_Debug(" Start PID: %d; Process count: %d\n",
+ segment->start_pid, segment->proc_count);
+ HYDU_Debug("\n");
+ HYDU_Debug(" Partition exec list:\n");
+ HYDU_Debug(" ....................\n");
+ for (exec = partition->exec_list; exec; exec = exec->next)
+ HYDU_Debug(" Exec: %s; Process count: %d\n", exec->exec[0],
+ exec->proc_count);
}
HYDU_Debug("\n");
@@ -393,61 +353,3 @@
return;
}
-
-
-HYD_Status HYD_LCHU_allocate_proc_params(struct HYD_Proc_params **params)
-{
- struct HYD_Proc_params *proc_params;
- HYD_Status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- HYDU_MALLOC(proc_params, struct HYD_Proc_params *, sizeof(struct HYD_Proc_params), status);
-
- proc_params->exec_proc_count = 0;
- proc_params->partition = NULL;
-
- proc_params->exec[0] = NULL;
- proc_params->user_env = NULL;
- proc_params->prop = HYD_ENV_PROP_UNSET;
- proc_params->prop_env = NULL;
- proc_params->stdout_cb = NULL;
- proc_params->stderr_cb = NULL;
- proc_params->next = NULL;
-
- *params = proc_params;
-
- fn_exit:
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- goto fn_exit;
-}
-
-
-HYD_Status HYD_LCHU_get_current_proc_params(struct HYD_Proc_params **params)
-{
- struct HYD_Proc_params *proc_params;
- HYD_Status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- if (handle.proc_params == NULL) {
- status = HYD_LCHU_allocate_proc_params(&handle.proc_params);
- HYDU_ERR_POP(status, "unable to allocate proc_params\n");
- }
-
- proc_params = handle.proc_params;
- while (proc_params->next)
- proc_params = proc_params->next;
-
- *params = proc_params;
-
- fn_exit:
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- goto fn_exit;
-}
Modified: mpich2/trunk/src/pm/hydra/launcher/utils/lchu.h
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/utils/lchu.h 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/launcher/utils/lchu.h 2009-03-22 05:18:11 UTC (rev 4156)
@@ -11,11 +11,9 @@
void HYD_LCHU_init_params(void);
void HYD_LCHU_free_params(void);
-void HYD_LCHU_free_proc_params(void);
-HYD_Status HYD_LCHU_create_host_list(void);
+HYD_Status HYD_LCHU_merge_exec_info_to_partition(void);
HYD_Status HYD_LCHU_create_env_list(void);
+HYD_Status HYD_LCHU_get_current_exec_info(struct HYD_Exec_info **info);
void HYD_LCHU_print_params(void);
-HYD_Status HYD_LCHU_allocate_proc_params(struct HYD_Proc_params **params);
-HYD_Status HYD_LCHU_get_current_proc_params(struct HYD_Proc_params **params);
#endif /* LCHU_H_INCLUDED */
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -99,7 +99,8 @@
HYD_Status HYD_PMCD_pmi_create_pg(void)
{
- struct HYD_Proc_params *proc_params;
+ struct HYD_Partition *partition;
+ struct HYD_Partition_exec *exec;
int num_procs;
HYD_Status status = HYD_SUCCESS;
@@ -107,11 +108,9 @@
/* Find the number of processes in the PG */
num_procs = 0;
- proc_params = handle.proc_params;
- while (proc_params) {
- num_procs += proc_params->exec_proc_count;
- proc_params = proc_params->next;
- }
+ for (partition = handle.partition_list; partition; partition = partition->next)
+ for (exec = partition->exec_list; exec; exec = exec->next)
+ num_procs += exec->proc_count;
status = create_pg(&pg_list, 0);
HYDU_ERR_POP(status, "unable to create pg\n");
@@ -182,10 +181,10 @@
"PMI version mismatch; %d.%d\n", pmi_version, pmi_subversion);
}
-fn_exit:
+ fn_exit:
HYDU_FUNC_EXIT();
return status;
-fn_fail:
+ fn_fail:
goto fn_exit;
}
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle.h
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle.h 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle.h 2009-03-22 05:18:11 UTC (rev 4156)
@@ -58,7 +58,7 @@
struct HYD_PMCD_pmi_handle {
char *cmd;
- HYD_Status (*handler)(int fd, char *args[]);
+ HYD_Status(*handler) (int fd, char *args[]);
};
extern struct HYD_PMCD_pmi_handle *HYD_PMCD_pmi_handle_list;
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle_v1.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle_v1.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle_v1.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -7,6 +7,7 @@
#include "hydra.h"
#include "hydra_utils.h"
#include "bsci.h"
+#include "demux.h"
#include "pmi_handle.h"
#include "pmi_handle_v1.h"
@@ -15,17 +16,18 @@
/* TODO: abort, create_kvs, destroy_kvs, getbyidx, spawn */
struct HYD_PMCD_pmi_handle HYD_PMCD_pmi_v1_foo[] = {
- { "initack", HYD_PMCD_pmi_handle_v1_initack },
- { "get_maxes", HYD_PMCD_pmi_handle_v1_get_maxes },
- { "get_appnum", HYD_PMCD_pmi_handle_v1_get_appnum },
- { "get_my_kvsname", HYD_PMCD_pmi_handle_v1_get_my_kvsname },
- { "barrier_in", HYD_PMCD_pmi_handle_v1_barrier_in },
- { "put", HYD_PMCD_pmi_handle_v1_put },
- { "get", HYD_PMCD_pmi_handle_v1_get },
- { "get_universe_size", HYD_PMCD_pmi_handle_v1_get_usize },
- { "finalize", HYD_PMCD_pmi_handle_v1_finalize },
- { "\0", NULL }
+ {"initack", HYD_PMCD_pmi_handle_v1_initack},
+ {"get_maxes", HYD_PMCD_pmi_handle_v1_get_maxes},
+ {"get_appnum", HYD_PMCD_pmi_handle_v1_get_appnum},
+ {"get_my_kvsname", HYD_PMCD_pmi_handle_v1_get_my_kvsname},
+ {"barrier_in", HYD_PMCD_pmi_handle_v1_barrier_in},
+ {"put", HYD_PMCD_pmi_handle_v1_put},
+ {"get", HYD_PMCD_pmi_handle_v1_get},
+ {"get_universe_size", HYD_PMCD_pmi_handle_v1_get_usize},
+ {"finalize", HYD_PMCD_pmi_handle_v1_finalize},
+ {"\0", NULL}
};
+
struct HYD_PMCD_pmi_handle *HYD_PMCD_pmi_v1 = HYD_PMCD_pmi_v1_foo;
static HYD_Status add_process_to_pg(HYD_PMCD_pmi_pg_t * pg, int fd)
@@ -77,31 +79,12 @@
}
-static HYD_Status free_pmi_kvs_list(HYD_PMCD_pmi_kvs_t * kvs_list)
-{
- HYD_PMCD_pmi_kvs_pair_t *key_pair, *tmp;
- HYD_Status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- key_pair = kvs_list->key_pair;
- while (key_pair) {
- tmp = key_pair->next;
- HYDU_FREE(key_pair);
- key_pair = tmp;
- }
- HYDU_FREE(kvs_list);
-
- HYDU_FUNC_EXIT();
- return status;
-}
-
-
HYD_Status HYD_PMCD_pmi_handle_v1_initack(int fd, char *args[])
{
int id, size, debug, i;
char *ssize, *srank, *sdebug, *tmp[HYDU_NUM_JOIN_STR], *cmd;
- struct HYD_Proc_params *proc_params;
+ struct HYD_Partition *partition;
+ struct HYD_Partition_exec *exec;
HYD_PMCD_pmi_pg_t *run;
HYD_Status status = HYD_SUCCESS;
@@ -111,11 +94,10 @@
id = atoi(strtok(NULL, "="));
size = 0;
- proc_params = handle.proc_params;
- while (proc_params) {
- size += proc_params->exec_proc_count;
- proc_params = proc_params->next;
- }
+ for (partition = handle.partition_list; partition; partition = partition->next)
+ for (exec = partition->exec_list; exec; exec = exec->next)
+ size += exec->proc_count;
+
debug = handle.debug;
status = HYDU_int_to_str(size, &ssize);
@@ -165,40 +147,6 @@
}
-HYD_Status HYD_PMCD_pmi_handle_v1_init(int fd, char *args[])
-{
- int pmi_version, pmi_subversion;
- char *tmp[HYDU_NUM_JOIN_STR];
- HYD_Status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- strtok(args[0], "=");
- pmi_version = atoi(strtok(NULL, "="));
- strtok(args[1], "=");
- pmi_subversion = atoi(strtok(NULL, "="));
-
- if (pmi_version == 1 && pmi_subversion <= 1) {
- /* We support PMI v1.0 and 1.1 */
- tmp[0] = "cmd=response_to_init pmi_version=1 pmi_subversion=1 rc=0\n";
- status = HYDU_sock_writeline(fd, tmp[0], strlen(tmp[0]));
- HYDU_ERR_POP(status, "error writing PMI line\n");
- }
- else {
- /* PMI version mismatch */
- HYDU_ERR_SETANDJUMP2(status, HYD_INTERNAL_ERROR,
- "PMI version mismatch; %d.%d\n", pmi_version, pmi_subversion);
- }
-
- fn_exit:
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- goto fn_exit;
-}
-
-
HYD_Status HYD_PMCD_pmi_handle_v1_get_maxes(int fd, char *args[])
{
int i;
@@ -259,9 +207,8 @@
/* Find the group id corresponding to this fd */
process = find_process(fd);
- if (process == NULL) /* We didn't find the process */
- HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
- "unable to find process structure\n");
+ if (process == NULL) /* We didn't find the process */
+ HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "unable to find process structure\n");
status = HYDU_int_to_str(process->pg->id, &sapp_num);
HYDU_ERR_POP(status, "unable to convert int to string\n");
@@ -301,7 +248,7 @@
/* Find the group id corresponding to this fd */
process = find_process(fd);
- if (process == NULL) /* We didn't find the process */
+ if (process == NULL) /* We didn't find the process */
HYDU_ERR_SETANDJUMP1(status, HYD_INTERNAL_ERROR,
"unable to find process structure for fd %d\n", fd);
@@ -337,7 +284,7 @@
/* Find the group id corresponding to this fd */
process = find_process(fd);
- if (process == NULL) /* We didn't find the process */
+ if (process == NULL) /* We didn't find the process */
HYDU_ERR_SETANDJUMP1(status, HYD_INTERNAL_ERROR,
"unable to find process structure for fd %d\n", fd);
@@ -386,7 +333,7 @@
/* Find the group id corresponding to this fd */
process = find_process(fd);
- if (process == NULL) /* We didn't find the process */
+ if (process == NULL) /* We didn't find the process */
HYDU_ERR_SETANDJUMP1(status, HYD_INTERNAL_ERROR,
"unable to find process structure for fd %d\n", fd);
@@ -458,7 +405,7 @@
/* Find the group id corresponding to this fd */
process = find_process(fd);
- if (process == NULL) /* We didn't find the process */
+ if (process == NULL) /* We didn't find the process */
HYDU_ERR_SETANDJUMP1(status, HYD_INTERNAL_ERROR,
"unable to find process structure for fd %d\n", fd);
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -15,10 +15,12 @@
int main(int argc, char **argv)
{
int i, j, arg, count, pid, ret_status;
- int stdin_fd, timeout;
+ int stdin_fd, timeout, process_id;
char *str, *timeout_str;
char *client_args[HYD_EXEC_ARGS];
char *tmp[HYDU_NUM_JOIN_STR];
+ HYD_Env_t *env;
+ struct HYD_Partition_exec *exec;
HYD_Status status = HYD_SUCCESS;
status = HYD_PMCD_pmi_proxy_get_params(argc, argv);
@@ -41,73 +43,93 @@
* local processes. That is, we can only have a single-level
* hierarchy of proxies. */
+ HYD_PMCD_pmi_proxy_params.partition_proc_count = 0;
+ for (exec = HYD_PMCD_pmi_proxy_params.exec_list; exec; exec = exec->next)
+ HYD_PMCD_pmi_proxy_params.partition_proc_count += exec->proc_count;
+
HYDU_MALLOC(HYD_PMCD_pmi_proxy_params.out, int *,
- HYD_PMCD_pmi_proxy_params.proc_count * sizeof(int), status);
+ HYD_PMCD_pmi_proxy_params.partition_proc_count * sizeof(int), status);
HYDU_MALLOC(HYD_PMCD_pmi_proxy_params.err, int *,
- HYD_PMCD_pmi_proxy_params.proc_count * sizeof(int), status);
+ HYD_PMCD_pmi_proxy_params.partition_proc_count * sizeof(int), status);
HYDU_MALLOC(HYD_PMCD_pmi_proxy_params.pid, int *,
- HYD_PMCD_pmi_proxy_params.proc_count * sizeof(int), status);
+ HYD_PMCD_pmi_proxy_params.partition_proc_count * sizeof(int), status);
HYDU_MALLOC(HYD_PMCD_pmi_proxy_params.exit_status, int *,
- HYD_PMCD_pmi_proxy_params.proc_count * sizeof(int), status);
+ HYD_PMCD_pmi_proxy_params.partition_proc_count * sizeof(int), status);
/* Initialize the exit status */
- for (i = 0; i < HYD_PMCD_pmi_proxy_params.proc_count; i++)
+ for (i = 0; i < HYD_PMCD_pmi_proxy_params.partition_proc_count; i++)
HYD_PMCD_pmi_proxy_params.exit_status[i] = -1;
- /* Spawn the processes */
- for (i = 0; i < HYD_PMCD_pmi_proxy_params.proc_count; i++) {
-
+ /* For local spawning, set the global environment here itself */
+ for (env = HYD_PMCD_pmi_proxy_params.global_env; env; env = env->next) {
j = 0;
- tmp[j++] = MPIU_Strdup("PMI_ID=");
- status = HYDU_int_to_str(HYD_PMCD_pmi_proxy_params.pmi_id + i, &str);
- HYDU_ERR_POP(status, "unable to convert int to string\n");
-
- tmp[j++] = MPIU_Strdup(str);
- HYDU_FREE(str);
+ tmp[j++] = MPIU_Strdup(env->env_name);
+ tmp[j++] = MPIU_Strdup("=");
+ tmp[j++] = MPIU_Strdup(env->env_value);
tmp[j++] = NULL;
status = HYDU_str_alloc_and_join(tmp, &str);
HYDU_ERR_POP(status, "unable to join strings\n");
-
HYDU_putenv(str);
for (j = 0; tmp[j]; j++)
HYDU_FREE(tmp[j]);
+ }
- if (chdir(HYD_PMCD_pmi_proxy_params.wdir) < 0)
- HYDU_ERR_SETANDJUMP1(status, HYD_INTERNAL_ERROR,
- "unable to change wdir (%s)\n", HYDU_strerror(errno));
+ /* Spawn the processes */
+ process_id = 0;
+ for (exec = HYD_PMCD_pmi_proxy_params.exec_list; exec; exec = exec->next) {
+ for (i = 0; i < exec->proc_count; i++) {
+ j = 0;
+ tmp[j++] = MPIU_Strdup("PMI_ID=");
+ status = HYDU_int_to_str(HYD_PMCD_pmi_proxy_params.pmi_id + process_id, &str);
+ HYDU_ERR_POP(status, "unable to convert int to string\n");
+ tmp[j++] = MPIU_Strdup(str);
+ HYDU_FREE(str);
+ tmp[j++] = NULL;
- for (j = 0, arg = 0; HYD_PMCD_pmi_proxy_params.args[j]; j++)
- client_args[arg++] = MPIU_Strdup(HYD_PMCD_pmi_proxy_params.args[j]);
- client_args[arg++] = NULL;
+ status = HYDU_str_alloc_and_join(tmp, &str);
+ HYDU_ERR_POP(status, "unable to join strings\n");
- /* FIXME: We need to figure out how many total number of
- * processes are there on this partition, and appropriately
- * bind them. */
- if ((i + HYD_PMCD_pmi_proxy_params.pmi_id) == 0) {
- status = HYDU_create_process(client_args, &HYD_PMCD_pmi_proxy_params.in,
- &HYD_PMCD_pmi_proxy_params.out[i],
- &HYD_PMCD_pmi_proxy_params.err[i],
- &HYD_PMCD_pmi_proxy_params.pid[i], i);
+ HYDU_putenv(str);
+ for (j = 0; tmp[j]; j++)
+ HYDU_FREE(tmp[j]);
+
+ if (chdir(HYD_PMCD_pmi_proxy_params.wdir) < 0)
+ HYDU_ERR_SETANDJUMP1(status, HYD_INTERNAL_ERROR,
+ "unable to change wdir (%s)\n", HYDU_strerror(errno));
+
+ for (j = 0, arg = 0; exec->exec[j]; j++)
+ client_args[arg++] = MPIU_Strdup(exec->exec[j]);
+ client_args[arg++] = NULL;
+
+ if ((process_id + HYD_PMCD_pmi_proxy_params.pmi_id) == 0) {
+ status = HYDU_create_process(client_args, &HYD_PMCD_pmi_proxy_params.in,
+ &HYD_PMCD_pmi_proxy_params.out[process_id],
+ &HYD_PMCD_pmi_proxy_params.err[process_id],
+ &HYD_PMCD_pmi_proxy_params.pid[process_id],
+ process_id);
+ }
+ else {
+ status = HYDU_create_process(client_args, NULL,
+ &HYD_PMCD_pmi_proxy_params.out[process_id],
+ &HYD_PMCD_pmi_proxy_params.err[process_id],
+ &HYD_PMCD_pmi_proxy_params.pid[process_id],
+ process_id);
+ }
+ HYDU_ERR_POP(status, "spawn process returned error\n");
+
+ HYDU_FREE(str);
+
+ process_id++;
}
- else {
- status = HYDU_create_process(client_args, NULL,
- &HYD_PMCD_pmi_proxy_params.out[i],
- &HYD_PMCD_pmi_proxy_params.err[i],
- &HYD_PMCD_pmi_proxy_params.pid[i],
- i);
- }
- HYDU_ERR_POP(status, "spawn process returned error\n");
-
- HYDU_FREE(str);
}
/* Everything is spawned, now wait for I/O */
- status = HYD_DMX_register_fd(HYD_PMCD_pmi_proxy_params.proc_count,
+ status = HYD_DMX_register_fd(HYD_PMCD_pmi_proxy_params.partition_proc_count,
HYD_PMCD_pmi_proxy_params.out,
HYD_STDOUT, HYD_PMCD_pmi_proxy_stdout_cb);
HYDU_ERR_POP(status, "unable to register fd\n");
- status = HYD_DMX_register_fd(HYD_PMCD_pmi_proxy_params.proc_count,
+ status = HYD_DMX_register_fd(HYD_PMCD_pmi_proxy_params.partition_proc_count,
HYD_PMCD_pmi_proxy_params.err,
HYD_STDOUT, HYD_PMCD_pmi_proxy_stderr_cb);
HYDU_ERR_POP(status, "unable to register fd\n");
@@ -140,8 +162,9 @@
/* Check to see if there's any open read socket left; if there
* are, we will just wait for more events. */
count = 0;
- for (i = 0; i < HYD_PMCD_pmi_proxy_params.proc_count; i++) {
- if (HYD_PMCD_pmi_proxy_params.out[i] != -1 || HYD_PMCD_pmi_proxy_params.err[i] != -1) {
+ for (i = 0; i < HYD_PMCD_pmi_proxy_params.partition_proc_count; i++) {
+ if (HYD_PMCD_pmi_proxy_params.out[i] != -1 ||
+ HYD_PMCD_pmi_proxy_params.err[i] != -1) {
count++;
break;
}
@@ -164,13 +187,13 @@
/* Find the pid and mark it as complete. */
if (pid > 0)
- for (i = 0; i < HYD_PMCD_pmi_proxy_params.proc_count; i++)
+ for (i = 0; i < HYD_PMCD_pmi_proxy_params.partition_proc_count; i++)
if (HYD_PMCD_pmi_proxy_params.pid[i] == pid)
HYD_PMCD_pmi_proxy_params.exit_status[i] = WEXITSTATUS(ret_status);
/* Check how many more processes are pending */
count = 0;
- for (i = 0; i < HYD_PMCD_pmi_proxy_params.proc_count; i++) {
+ for (i = 0; i < HYD_PMCD_pmi_proxy_params.partition_proc_count; i++) {
if (HYD_PMCD_pmi_proxy_params.exit_status[i] == -1) {
count++;
break;
@@ -186,7 +209,7 @@
} while (1);
ret_status = 0;
- for (i = 0; i < HYD_PMCD_pmi_proxy_params.proc_count; i++)
+ for (i = 0; i < HYD_PMCD_pmi_proxy_params.partition_proc_count; i++)
ret_status |= HYD_PMCD_pmi_proxy_params.exit_status[i];
fn_exit:
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.h
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.h 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.h 2009-03-22 05:18:11 UTC (rev 4156)
@@ -11,15 +11,18 @@
#include "hydra_utils.h"
struct HYD_PMCD_pmi_proxy_params {
- HYD_Env_t *global_env;
- HYD_Env_t *env_list;
- int proc_count;
int proxy_port;
int pmi_id;
- char *args[HYD_EXEC_ARGS];
char *wdir;
- struct HYD_Partition_list *partition;
+ HYD_Env_t *global_env;
+ int one_pass_count;
+ int partition_proc_count;
+
+ /* Process segmentation information for this partition */
+ struct HYD_Partition_segment segment;
+ struct HYD_Partition_exec *exec_list;
+
int *pid;
int *out;
int *err;
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy_cb.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy_cb.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy_cb.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -24,7 +24,7 @@
if (events & HYD_STDIN)
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "stdout handler got stdin event\n");
- if (fd == HYD_PMCD_pmi_proxy_listenfd) { /* mpiexec is trying to connect */
+ if (fd == HYD_PMCD_pmi_proxy_listenfd) { /* mpiexec is trying to connect */
status = HYDU_sock_accept(fd, &accept_fd);
HYDU_ERR_POP(status, "accept error\n");
@@ -46,7 +46,7 @@
}
if (cmd == KILLALL_PROCS) { /* Got the killall command */
- for (i = 0; i < HYD_PMCD_pmi_proxy_params.proc_count; i++)
+ for (i = 0; i < HYD_PMCD_pmi_proxy_params.partition_proc_count; i++)
if (HYD_PMCD_pmi_proxy_params.pid[i] != -1)
kill(HYD_PMCD_pmi_proxy_params.pid[i], SIGKILL);
@@ -84,7 +84,7 @@
status = HYD_DMX_deregister_fd(fd);
HYDU_ERR_POP(status, "unable to deregister fd\n");
- for (i = 0; i < HYD_PMCD_pmi_proxy_params.proc_count; i++)
+ for (i = 0; i < HYD_PMCD_pmi_proxy_params.partition_proc_count; i++)
if (HYD_PMCD_pmi_proxy_params.out[i] == fd)
HYD_PMCD_pmi_proxy_params.out[i] = -1;
}
@@ -113,7 +113,7 @@
status = HYD_DMX_deregister_fd(fd);
HYDU_ERR_POP(status, "unable to deregister fd\n");
- for (i = 0; i < HYD_PMCD_pmi_proxy_params.proc_count; i++)
+ for (i = 0; i < HYD_PMCD_pmi_proxy_params.partition_proc_count; i++)
if (HYD_PMCD_pmi_proxy_params.err[i] == fd)
HYD_PMCD_pmi_proxy_params.err[i] = -1;
}
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy_utils.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy_utils.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy_utils.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -11,37 +11,63 @@
HYD_Status HYD_PMCD_pmi_proxy_get_params(int t_argc, char **t_argv)
{
- int argc = t_argc;
char **argv = t_argv, *str;
int arg, i, count;
- struct HYD_Partition_list *partition, *run;
+ HYD_Env_t *env;
+ struct HYD_Partition_exec *exec = NULL;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
+ HYD_PMCD_pmi_proxy_params.exec_list = NULL;
HYD_PMCD_pmi_proxy_params.global_env = NULL;
- HYD_PMCD_pmi_proxy_params.env_list = NULL;
- HYD_PMCD_pmi_proxy_params.partition = NULL;
- status = HYDU_list_global_env(&HYD_PMCD_pmi_proxy_params.global_env);
- HYDU_ERR_POP(status, "unable to get the global env list\n");
+ while (*argv) {
+ ++argv;
- while (--argc && ++argv) {
+ /* Proxy port */
+ if (!strcmp(*argv, "--proxy-port")) {
+ argv++;
+ HYD_PMCD_pmi_proxy_params.proxy_port = atoi(*argv);
+ continue;
+ }
- /* Process count */
- if (!strcmp(*argv, "--proc-count")) {
+ /* Working directory */
+ if (!strcmp(*argv, "--wdir")) {
argv++;
- HYD_PMCD_pmi_proxy_params.proc_count = atoi(*argv);
+ HYD_PMCD_pmi_proxy_params.wdir = MPIU_Strdup(*argv);
continue;
}
- /* Proxy port */
- if (!strcmp(*argv, "--proxy-port")) {
+ /* Global env */
+ if (!strcmp(*argv, "--global-env")) {
argv++;
- HYD_PMCD_pmi_proxy_params.proxy_port = atoi(*argv);
+ count = atoi(*argv);
+ for (i = 0; i < count; i++) {
+ argv++;
+ str = *argv;
+
+ /* Some bootstrap servers remove the quotes that we
+ * added, while some others do not. For the cases
+ * where they are not removed, we do it ourselves. */
+ if (*str == '\'') {
+ str++;
+ str[strlen(str) - 1] = 0;
+ }
+ env = HYDU_str_to_env(str);
+ HYDU_append_env_to_list(*env, &HYD_PMCD_pmi_proxy_params.global_env);
+ HYDU_FREE(env);
+ }
continue;
}
+ /* One-pass Count */
+ if (!strcmp(*argv, "--one-pass-count")) {
+ argv++;
+ HYD_PMCD_pmi_proxy_params.one_pass_count = atoi(*argv);
+ continue;
+ }
+
/* PMI_ID: This is the PMI_ID for the first process;
* everything else is incremented from here. */
if (!strcmp(*argv, "--pmi-id")) {
@@ -50,35 +76,31 @@
continue;
}
- /* Partition information is passed as two parameters; name
- * followed by proc count. Multiple partitions are specified
- * as multiple parameters. */
- if (!strcmp(*argv, "--partition")) {
- argv++;
- HYDU_alloc_partition(&partition);
- partition->name = MPIU_Strdup(*argv);
- argv++;
- partition->proc_count = atoi(*argv);
-
- if (!HYD_PMCD_pmi_proxy_params.partition)
- HYD_PMCD_pmi_proxy_params.partition = partition;
+ /* New executable */
+ if (!strcmp(*argv, "--exec")) {
+ if (HYD_PMCD_pmi_proxy_params.exec_list == NULL) {
+ status = HYDU_alloc_partition_exec(&HYD_PMCD_pmi_proxy_params.exec_list);
+ HYDU_ERR_POP(status, "unable to allocate partition exec\n");
+ }
else {
- for (run = HYD_PMCD_pmi_proxy_params.partition; run->next; run = run->next);
- run->next = partition;
+ for (exec = HYD_PMCD_pmi_proxy_params.exec_list; exec->next;
+ exec = exec->next);
+ status = HYDU_alloc_partition_exec(&exec->next);
+ HYDU_ERR_POP(status, "unable to allocate partition exec\n");
}
continue;
}
- /* Working directory */
- if (!strcmp(*argv, "--wdir")) {
+ /* Process count */
+ if (!strcmp(*argv, "--proc-count")) {
argv++;
- HYD_PMCD_pmi_proxy_params.wdir = MPIU_Strdup(*argv);
+ for (exec = HYD_PMCD_pmi_proxy_params.exec_list; exec->next; exec = exec->next);
+ exec->proc_count = atoi(*argv);
continue;
}
- /* Environment information is passed as a list of names; we
- * need to find the values from our environment. */
- if (!strcmp(*argv, "--environment")) {
+ /* Local env */
+ if (!strcmp(*argv, "--local-env")) {
argv++;
count = atoi(*argv);
for (i = 0; i < count; i++) {
@@ -92,20 +114,25 @@
str++;
str[strlen(str) - 1] = 0;
}
- HYDU_putenv(str);
+ env = HYDU_str_to_env(str);
+ HYDU_append_env_to_list(*env, &exec->prop_env);
+ HYDU_FREE(env);
}
continue;
}
/* Fall through case is application parameters. Load
* everything into the args variable. */
- for (arg = 0; *argv;) {
- HYD_PMCD_pmi_proxy_params.args[arg++] = MPIU_Strdup(*argv);
+ for (exec = HYD_PMCD_pmi_proxy_params.exec_list; exec->next; exec = exec->next);
+ for (arg = 0; *argv && strcmp(*argv, "--exec");) {
+ exec->exec[arg++] = MPIU_Strdup(*argv);
++argv;
- --argc;
}
- HYD_PMCD_pmi_proxy_params.args[arg++] = NULL;
- break;
+ exec->exec[arg++] = NULL;
+
+ /* If we already touched the next --exec, step back */
+ if (*argv && !strcmp(*argv, "--exec"))
+ argv--;
}
fn_exit:
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_cb.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_cb.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_cb.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -30,16 +30,14 @@
HYD_Status HYD_PMCD_pmi_serv_cb(int fd, HYD_Event_t events)
{
int accept_fd, linelen, i;
- char *buf, *cmd, *args[HYD_EXEC_ARGS];
- char *str1, *str2;
+ char *buf = NULL, *cmd, *args[HYD_EXEC_ARGS];
+ char *str1 = NULL, *str2 = NULL;
struct HYD_PMCD_pmi_handle *h;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
- HYDU_MALLOC(buf, char *, HYD_TMPBUF_SIZE, status);
-
- if (fd == HYD_PMCD_pmi_serv_listenfd) { /* Someone is trying to connect to us */
+ if (fd == HYD_PMCD_pmi_serv_listenfd) { /* Someone is trying to connect to us */
status = HYDU_sock_accept(fd, &accept_fd);
HYDU_ERR_POP(status, "accept error\n");
@@ -47,6 +45,8 @@
HYDU_ERR_POP(status, "unable to register fd\n");
}
else {
+ HYDU_MALLOC(buf, char *, HYD_TMPBUF_SIZE, status);
+
status = HYDU_sock_readline(fd, buf, HYD_TMPBUF_SIZE, &linelen);
HYDU_ERR_POP(status, "PMI read line error\n");
@@ -125,7 +125,10 @@
}
fn_exit:
- HYDU_FREE(buf);
+ if (buf)
+ HYDU_FREE(buf);
+ if (str1)
+ HYDU_FREE(str1);
HYDU_FUNC_EXIT();
return status;
@@ -136,8 +139,7 @@
HYD_Status HYD_PMCD_pmi_serv_cleanup(void)
{
- struct HYD_Proc_params *proc_params;
- struct HYD_Partition_list *partition;
+ struct HYD_Partition *partition;
int fd;
enum HYD_PMCD_pmi_proxy_cmds cmd;
HYD_Status status = HYD_SUCCESS, overall_status = HYD_SUCCESS;
@@ -148,24 +150,22 @@
* bunch of processes to do this. */
/* Connect to all proxies and send a KILL command */
cmd = KILLALL_PROCS;
- for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
- for (partition = proc_params->partition; partition; partition = partition->next) {
- status = HYDU_sock_connect(partition->name, handle.proxy_port, &fd);
- if (status != HYD_SUCCESS) {
- HYDU_Warn_printf("unable to connect to the proxy on %s\n", partition->name);
- overall_status = HYD_INTERNAL_ERROR;
- continue; /* Move on to the next proxy */
- }
+ for (partition = handle.partition_list; partition; partition = partition->next) {
+ status = HYDU_sock_connect(partition->name, handle.proxy_port, &fd);
+ if (status != HYD_SUCCESS) {
+ HYDU_Warn_printf("unable to connect to the proxy on %s\n", partition->name);
+ overall_status = HYD_INTERNAL_ERROR;
+ continue; /* Move on to the next proxy */
+ }
- status = HYDU_sock_write(fd, &cmd, sizeof(cmd));
- if (status != HYD_SUCCESS) {
- HYDU_Warn_printf("unable to send data to the proxy on %s\n", partition->name);
- overall_status = HYD_INTERNAL_ERROR;
- continue; /* Move on to the next proxy */
- }
+ status = HYDU_sock_write(fd, &cmd, sizeof(cmd));
+ if (status != HYD_SUCCESS) {
+ HYDU_Warn_printf("unable to send data to the proxy on %s\n", partition->name);
+ overall_status = HYD_INTERNAL_ERROR;
+ continue; /* Move on to the next proxy */
+ }
- close(fd);
- }
+ close(fd);
}
HYDU_FUNC_EXIT();
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -38,13 +38,12 @@
{
char *port_range, *port_str, *sport, *str;
uint16_t port;
- int i, arg;
- int process_id, group_id;
+ int i, arg, process_id;
char hostname[MAX_HOSTNAME_LEN];
HYD_Env_t *env;
char *path_str[HYDU_NUM_JOIN_STR];
- struct HYD_Proc_params *proc_params;
- struct HYD_Partition_list *partition;
+ struct HYD_Partition *partition;
+ struct HYD_Partition_exec *exec;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
@@ -73,8 +72,7 @@
/* Create a port string for MPI processes to use to connect to */
if (gethostname(hostname, MAX_HOSTNAME_LEN) < 0)
HYDU_ERR_SETANDJUMP2(status, HYD_SOCK_ERROR,
- "gethostname error (hostname: %s; errno: %d)\n", hostname,
- errno);
+ "gethostname error (hostname: %s; errno: %d)\n", hostname, errno);
status = HYDU_int_to_str(port, &sport);
HYDU_ERR_POP(status, "cannot convert int to string\n");
@@ -99,81 +97,89 @@
status = HYD_PMCD_pmi_create_pg();
HYDU_ERR_POP(status, "unable to create process group\n");
+ handle.one_pass_count = 0;
+ for (partition = handle.partition_list; partition; partition = partition->next)
+ handle.one_pass_count += partition->total_proc_count;
+
/* Create the arguments list for each proxy */
process_id = 0;
- group_id = 0;
- for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
- for (partition = proc_params->partition; partition; partition = partition->next) {
+ for (partition = handle.partition_list; partition; partition = partition->next) {
- partition->group_id = group_id++;
- partition->group_rank = 0;
+ for (arg = 0; partition->proxy_args[arg]; arg++);
+ i = 0;
+ path_str[i++] = MPIU_Strdup(handle.base_path);
+ path_str[i++] = MPIU_Strdup("pmi_proxy");
+ path_str[i] = NULL;
+ status = HYDU_str_alloc_and_join(path_str, &partition->proxy_args[arg++]);
+ HYDU_ERR_POP(status, "unable to join strings\n");
+ HYDU_free_strlist(path_str);
- for (arg = 0; partition->args[arg]; arg++);
- i = 0;
- path_str[i++] = MPIU_Strdup(handle.base_path);
- path_str[i++] = MPIU_Strdup("pmi_proxy");
- path_str[i] = NULL;
- status = HYDU_str_alloc_and_join(path_str, &partition->args[arg++]);
- HYDU_ERR_POP(status, "unable to join strings\n");
+ status = HYDU_int_to_str(handle.proxy_port, &str);
+ HYDU_ERR_POP(status, "unable to convert in to string\n");
+ partition->proxy_args[arg++] = MPIU_Strdup("--proxy-port");
+ partition->proxy_args[arg++] = MPIU_Strdup(str);
+ HYDU_FREE(str);
- HYDU_free_strlist(path_str);
+ status = HYDU_int_to_str(handle.one_pass_count, &str);
+ HYDU_ERR_POP(status, "unable to convert in to string\n");
+ partition->proxy_args[arg++] = MPIU_Strdup("--one-pass-count");
+ partition->proxy_args[arg++] = MPIU_Strdup(str);
+ HYDU_FREE(str);
- status = HYDU_int_to_str(partition->proc_count, &str);
- HYDU_ERR_POP(status, "unable to convert int to string\n");
+ partition->proxy_args[arg++] = MPIU_Strdup("--wdir");
+ partition->proxy_args[arg++] = MPIU_Strdup(handle.wdir);
- partition->args[arg++] = MPIU_Strdup("--proc-count");
- partition->args[arg++] = MPIU_Strdup(str);
+ /* Pass the global environment separately, instead of for each
+ * executable, as an optimization */
+ partition->proxy_args[arg++] = MPIU_Strdup("--global-env");
+ for (i = 0, env = handle.system_env; env; env = env->next, i++);
+ for (env = handle.prop_env; env; env = env->next, i++);
+ status = HYDU_int_to_str(i, &str);
+ HYDU_ERR_POP(status, "unable to convert int to string\n");
+ partition->proxy_args[arg++] = MPIU_Strdup(str);
+ HYDU_FREE(str);
+ partition->proxy_args[arg++] = NULL;
+ HYDU_list_append_env_to_str(handle.system_env, partition->proxy_args);
+ HYDU_list_append_env_to_str(handle.prop_env, partition->proxy_args);
- partition->args[arg++] = MPIU_Strdup("--partition");
- partition->args[arg++] = MPIU_Strdup(partition->name);
- partition->args[arg++] = MPIU_Strdup(str);
- HYDU_FREE(str);
+ status = HYDU_int_to_str(process_id, &str);
+ HYDU_ERR_POP(status, "unable to convert int to string\n");
+ for (arg = 0; partition->proxy_args[arg]; arg++);
+ partition->proxy_args[arg++] = MPIU_Strdup("--pmi-id");
+ partition->proxy_args[arg++] = MPIU_Strdup(str);
+ HYDU_FREE(str);
+ partition->proxy_args[arg++] = NULL;
- status = HYDU_int_to_str(process_id, &str);
+ /* Now pass the local executable information */
+ for (exec = partition->exec_list; exec; exec = exec->next) {
+ for (arg = 0; partition->proxy_args[arg]; arg++);
+ partition->proxy_args[arg++] = MPIU_Strdup("--exec");
+
+ status = HYDU_int_to_str(exec->proc_count, &str);
HYDU_ERR_POP(status, "unable to convert int to string\n");
-
- partition->args[arg++] = MPIU_Strdup("--pmi-id");
- partition->args[arg++] = MPIU_Strdup(str);
+ partition->proxy_args[arg++] = MPIU_Strdup("--proc-count");
+ partition->proxy_args[arg++] = MPIU_Strdup(str);
HYDU_FREE(str);
+ partition->proxy_args[arg++] = NULL;
- status = HYDU_int_to_str(handle.proxy_port, &str);
- HYDU_ERR_POP(status, "unable to convert in to string\n");
-
- partition->args[arg++] = MPIU_Strdup("--proxy-port");
- partition->args[arg++] = MPIU_Strdup(str);
+ for (arg = 0; partition->proxy_args[arg]; arg++);
+ partition->proxy_args[arg++] = MPIU_Strdup("--local-env");
+ for (i = 0, env = exec->prop_env; env; env = env->next, i++);
+ status = HYDU_int_to_str(i, &str);
+ HYDU_ERR_POP(status, "unable to convert int to string\n");
+ partition->proxy_args[arg++] = MPIU_Strdup(str);
HYDU_FREE(str);
+ partition->proxy_args[arg++] = NULL;
+ HYDU_list_append_env_to_str(exec->prop_env, partition->proxy_args);
- partition->args[arg++] = MPIU_Strdup("--wdir");
- partition->args[arg++] = MPIU_Strdup(handle.wdir);
+ HYDU_list_append_strlist(exec->exec, partition->proxy_args);
- partition->args[arg++] = MPIU_Strdup("--environment");
- i = 0;
- for (env = handle.system_env; env; env = env->next)
- i++;
- for (env = handle.prop_env; env; env = env->next)
- i++;
- for (env = proc_params->prop_env; env; env = env->next)
- i++;
- status = HYDU_int_to_str(i, &str);
- HYDU_ERR_POP(status, "unable to convert in to string\n");
+ process_id += exec->proc_count;
+ }
- partition->args[arg++] = MPIU_Strdup(str);
- partition->args[arg++] = NULL;
-
- HYDU_list_append_env_to_str(handle.system_env, partition->args);
- HYDU_list_append_env_to_str(handle.prop_env, partition->args);
- HYDU_list_append_env_to_str(proc_params->prop_env, partition->args);
-
- for (arg = 0; partition->args[arg]; arg++);
- partition->args[arg] = NULL;
- HYDU_list_append_strlist(proc_params->exec, partition->args);
-
- if (handle.debug) {
- HYDU_Debug("Executable passed to the bootstrap: ");
- HYDU_print_strlist(partition->args);
- }
-
- process_id += partition->proc_count;
+ if (handle.debug) {
+ HYDU_Debug("Executable passed to the bootstrap: ");
+ HYDU_print_strlist(partition->proxy_args);
}
}
Modified: mpich2/trunk/src/pm/hydra/pm/utils/pmi.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/utils/pmi.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/pm/utils/pmi.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -3,5 +3,3 @@
* (C) 2008 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
-
-
Modified: mpich2/trunk/src/pm/hydra/utils/args/args.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/args/args.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/utils/args/args.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -8,9 +8,8 @@
HYD_Status HYDU_get_base_path(char *execname, char *wdir, char **path)
{
- char *loc, *pre, *post;
+ char *loc, *post;
char *path_str[HYDU_NUM_JOIN_STR];
- int i;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
@@ -21,11 +20,11 @@
if (!loc) { /* If there is no path */
*path = MPIU_Strdup("");
}
- else { /* There is a path */
+ else { /* There is a path */
*(++loc) = 0;
/* Check if its absolute or relative */
- if (post[0] != '/') { /* relative */
+ if (post[0] != '/') { /* relative */
path_str[0] = wdir;
path_str[1] = "/";
path_str[2] = post;
@@ -33,17 +32,17 @@
status = HYDU_str_alloc_and_join(path_str, path);
HYDU_ERR_POP(status, "unable to join strings\n");
}
- else { /* absolute */
+ else { /* absolute */
*path = MPIU_Strdup(post);
}
}
-fn_exit:
+ fn_exit:
if (post)
HYDU_FREE(post);
HYDU_FUNC_EXIT();
return status;
-fn_fail:
+ fn_fail:
goto fn_exit;
}
Modified: mpich2/trunk/src/pm/hydra/utils/env/env.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/env/env.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/utils/env/env.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -28,11 +28,11 @@
for (i = 0; tmp[i]; i++)
HYDU_FREE(tmp[i]);
-fn_exit:
+ fn_exit:
HYDU_FUNC_EXIT();
return status;
-fn_fail:
+ fn_fail:
goto fn_exit;
}
@@ -62,6 +62,32 @@
}
+HYD_Env_t *HYDU_str_to_env(char *str)
+{
+ HYD_Env_t *env;
+ char *env_name, *env_value;
+ HYD_Status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ HYDU_MALLOC(env, HYD_Env_t *, sizeof(HYD_Env_t), status);
+ env_name = strtok(str, "=");
+ env_value = strtok(NULL, "=");
+ env->env_name = MPIU_Strdup(env_name);
+ env->env_value = env_value ? MPIU_Strdup(env_value) : NULL;
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return env;
+
+ fn_fail:
+ if (env)
+ HYDU_FREE(env);
+ env = NULL;
+ goto fn_exit;
+}
+
+
HYD_Status HYDU_list_append_env_to_str(HYD_Env_t * env_list, char **str_list)
{
int i;
@@ -80,11 +106,11 @@
}
str_list[i++] = NULL;
-fn_exit:
+ fn_exit:
HYDU_FUNC_EXIT();
return status;
-fn_fail:
+ fn_fail:
goto fn_exit;
}
@@ -92,7 +118,7 @@
HYD_Status HYDU_list_global_env(HYD_Env_t ** env_list)
{
HYD_Env_t *env;
- char *env_name, *env_value, *env_str;
+ char *env_str;
int i;
HYD_Status status = HYD_SUCCESS;
@@ -101,19 +127,15 @@
*env_list = NULL;
i = 0;
while (environ[i]) {
- HYDU_MALLOC(env, HYD_Env_t *, sizeof(HYD_Env_t), status);
-
env_str = MPIU_Strdup(environ[i]);
- env_name = strtok(env_str, "=");
- env_value = strtok(NULL, "=");
- env->env_name = MPIU_Strdup(env_name);
- env->env_value = env_value ? MPIU_Strdup(env_value) : NULL;
- HYDU_FREE(env_str);
+ env = HYDU_str_to_env(env_str);
+
status = HYDU_append_env_to_list(*env, env_list);
HYDU_ERR_POP(status, "unable to add env to list\n");
HYDU_env_free(env);
+ HYDU_FREE(env_str);
i++;
}
@@ -298,7 +320,7 @@
}
-HYD_Status HYDU_comma_list_to_env_list(char *str, HYD_Env_t **env_list)
+HYD_Status HYDU_comma_list_to_env_list(char *str, HYD_Env_t ** env_list)
{
char *env_name;
HYD_Env_t *env;
Modified: mpich2/trunk/src/pm/hydra/utils/launch/allocate.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/launch/allocate.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/utils/launch/allocate.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -6,25 +6,24 @@
#include "hydra_utils.h"
-HYD_Status HYDU_alloc_partition(struct HYD_Partition_list **partition)
+HYD_Status HYDU_alloc_partition(struct HYD_Partition **partition)
{
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
- HYDU_MALLOC(*partition, struct HYD_Partition_list *,
- sizeof(struct HYD_Partition_list), status);
+ HYDU_MALLOC(*partition, struct HYD_Partition *, sizeof(struct HYD_Partition), status);
(*partition)->name = NULL;
- (*partition)->proc_count = 0;
- (*partition)->mapping = NULL;
- (*partition)->group_id = -1;
- (*partition)->group_rank = -1;
+ (*partition)->segment_list = NULL;
+ (*partition)->total_proc_count = 0;
+
(*partition)->pid = -1;
(*partition)->out = -1;
(*partition)->err = -1;
(*partition)->exit_status = -1;
- (*partition)->args[0] = NULL;
+ (*partition)->proxy_args[0] = NULL;
+ (*partition)->exec_list = NULL;
(*partition)->next = NULL;
fn_exit:
@@ -36,37 +35,241 @@
}
-void HYDU_free_partition_list(struct HYD_Partition_list *partition)
+HYD_Status HYDU_alloc_exec_info(struct HYD_Exec_info **exec_info)
{
- struct HYD_Partition_list *run, *p;
- int arg;
+ HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
- p = partition;
- run = p;
- while (run) {
- run = p->next;
+ HYDU_MALLOC(*exec_info, struct HYD_Exec_info *, sizeof(struct HYD_Exec_info), status);
+ (*exec_info)->exec_proc_count = 0;
+ (*exec_info)->exec[0] = NULL;
+ (*exec_info)->user_env = NULL;
+ (*exec_info)->prop = HYD_ENV_PROP_UNSET;
+ (*exec_info)->prop_env = NULL;
+ (*exec_info)->next = NULL;
- if (p->name) {
- HYDU_FREE(p->name);
- p->name = NULL;
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ goto fn_exit;
+}
+
+
+void HYDU_free_exec_info_list(struct HYD_Exec_info *exec_info_list)
+{
+ struct HYD_Exec_info *exec_info, *run;
+
+ HYDU_FUNC_ENTER();
+
+ exec_info = exec_info_list;
+ while (exec_info) {
+ run = exec_info->next;
+ HYDU_free_strlist(exec_info->exec);
+
+ HYDU_env_free_list(exec_info->user_env);
+ exec_info->user_env = NULL;
+
+ HYDU_env_free_list(exec_info->prop_env);
+ exec_info->prop_env = NULL;
+
+ HYDU_FREE(exec_info);
+ exec_info = run;
+ }
+
+ HYDU_FUNC_EXIT();
+}
+
+
+void HYDU_free_partition_list(struct HYD_Partition *partition_list)
+{
+ struct HYD_Partition *partition, *tpartition;
+ struct HYD_Partition_segment *segment, *tsegment;
+ struct HYD_Partition_exec *exec, *texec;
+
+ HYDU_FUNC_ENTER();
+
+ partition = partition_list;
+ while (partition) {
+ tpartition = partition->next;
+
+ HYDU_FREE(partition->name);
+
+ segment = partition->segment_list;
+ while (segment) {
+ tsegment = segment->next;
+ if (segment->mapping) {
+ HYDU_free_strlist(segment->mapping);
+ HYDU_FREE(segment->mapping);
+ }
+ HYDU_FREE(segment);
+ segment = tsegment;
}
- if (p->mapping) {
- for (arg = 0; p->mapping[arg]; arg++) {
- HYDU_FREE(p->mapping[arg]);
- p->mapping[arg] = NULL;
+ exec = partition->exec_list;
+ while (exec) {
+ texec = exec->next;
+ HYDU_free_strlist(exec->exec);
+ if (exec->prop_env)
+ HYDU_env_free(exec->prop_env);
+ HYDU_FREE(exec);
+ exec = texec;
+ }
+
+ HYDU_free_strlist(partition->proxy_args);
+
+ HYDU_FREE(partition);
+ partition = tpartition;
+ }
+
+ HYDU_FUNC_EXIT();
+}
+
+
+HYD_Status HYDU_alloc_partition_segment(struct HYD_Partition_segment **segment)
+{
+ HYD_Status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ HYDU_MALLOC(*segment, struct HYD_Partition_segment *,
+ sizeof(struct HYD_Partition_segment), status);
+ (*segment)->start_pid = -1;
+ (*segment)->proc_count = 0;
+ (*segment)->mapping = NULL;
+ (*segment)->next = NULL;
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ goto fn_exit;
+}
+
+
+HYD_Status HYDU_merge_partition_segment(char *name, struct HYD_Partition_segment *segment,
+ struct HYD_Partition **partition_list)
+{
+ struct HYD_Partition *partition;
+ struct HYD_Partition_segment *s;
+ HYD_Status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ if (partition_list == NULL) {
+ HYDU_alloc_partition(partition_list);
+ (*partition_list)->segment_list = segment;
+ }
+ else {
+ partition = *partition_list;
+ while (partition) {
+ if (strcmp(partition->name, name) == 0) {
+ if (partition->segment_list == NULL)
+ partition->segment_list = segment;
+ else {
+ s = partition->segment_list;
+ while (s->next)
+ s = s->next;
+ s->next = segment;
+ }
+ break;
}
- HYDU_FREE(p->mapping);
- p->mapping = NULL;
+ else if (partition->next == NULL) {
+ HYDU_alloc_partition(&partition->next);
+ partition->next->segment_list = segment;
+ break;
+ }
+ else {
+ partition = partition->next;
+ }
}
+ }
- HYDU_free_strlist(p->args);
- HYDU_FREE(p);
+ HYDU_FUNC_EXIT();
+ return status;
+}
- p = run;
+
+HYD_Status HYDU_alloc_partition_exec(struct HYD_Partition_exec ** exec)
+{
+ HYD_Status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ HYDU_MALLOC(*exec, struct HYD_Partition_exec *, sizeof(struct HYD_Partition_exec), status);
+ (*exec)->exec[0] = NULL;
+ (*exec)->proc_count = 0;
+ (*exec)->prop = HYD_ENV_PROP_UNSET;
+ (*exec)->prop_env = NULL;
+ (*exec)->next = NULL;
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ goto fn_exit;
+}
+
+
+HYD_Status HYDU_create_host_list(char *host_file, struct HYD_Partition **partition_list)
+{
+ FILE *fp = NULL;
+ char line[2 * MAX_HOSTNAME_LEN], *hostname, *procs;
+ int num_procs, total_count;
+ struct HYD_Partition_segment *segment;
+ HYD_Status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ if (!strcmp(host_file, "HYDRA_USE_LOCALHOST")) {
+ HYDU_alloc_partition(&(*partition_list));
+ (*partition_list)->name = MPIU_Strdup("localhost");
+ (*partition_list)->total_proc_count = 1;
+
+ HYDU_alloc_partition_segment(&((*partition_list)->segment_list));
+ (*partition_list)->segment_list->start_pid = 0;
+ (*partition_list)->segment_list->proc_count = 1;
}
+ else {
+ fp = fopen(host_file, "r");
+ if (!fp)
+ HYDU_ERR_SETANDJUMP1(status, HYD_INTERNAL_ERROR,
+ "unable to open host file: %s\n", host_file);
+ total_count = 0;
+ while (!feof(fp)) {
+ if ((fscanf(fp, "%s", line) < 0) && errno)
+ HYDU_ERR_SETANDJUMP1(status, HYD_INTERNAL_ERROR,
+ "unable to read input line (errno: %d)\n", errno);
+ if (feof(fp))
+ break;
+
+ hostname = strtok(line, ":");
+ procs = strtok(NULL, ":");
+ num_procs = procs ? atoi(procs) : 1;
+
+ /* Try to find an existing partition with this name and
+ * add this segment in. If there is no existing partition
+ * with this name, we create a new one. */
+ HYDU_alloc_partition_segment(&segment);
+ segment->start_pid = total_count;
+ segment->proc_count = num_procs;
+ HYDU_merge_partition_segment(hostname, segment, partition_list);
+
+ total_count += num_procs;
+ }
+
+ fclose(fp);
+ }
+
+ fn_exit:
HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ goto fn_exit;
}
Modified: mpich2/trunk/src/pm/hydra/utils/launch/launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/launch/launch.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/utils/launch/launch.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -9,7 +9,7 @@
HYD_Status HYDU_create_process(char **client_arg, int *in, int *out, int *err,
int *pid, int core)
{
- int inpipe[2], outpipe[2], errpipe[2], tpid, my_pid;
+ int inpipe[2], outpipe[2], errpipe[2], tpid;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
Modified: mpich2/trunk/src/pm/hydra/utils/sock/sock.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/sock/sock.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/utils/sock/sock.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -395,7 +395,7 @@
if (count < 0)
HYDU_ERR_SETANDJUMP2(status, HYD_SOCK_ERROR, "write error on %d (%s)\n",
fd, HYDU_strerror(errno))
- *buf_offset += count;
+ * buf_offset += count;
*buf_count -= count;
break;
}
Modified: mpich2/trunk/src/pm/hydra/utils/string/string.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/string/string.c 2009-03-21 04:05:44 UTC (rev 4155)
+++ mpich2/trunk/src/pm/hydra/utils/string/string.c 2009-03-22 05:18:11 UTC (rev 4156)
@@ -93,10 +93,10 @@
*str1 = MPIU_Strdup(str);
for (i = 0; (*str1)[i] && ((*str1)[i] != sep); i++);
- if ((*str1)[i] == 0) /* End of the string */
+ if ((*str1)[i] == 0) /* End of the string */
*str2 = NULL;
else {
- *str2 = &((*str1)[i+1]);
+ *str2 = &((*str1)[i + 1]);
(*str1)[i] = 0;
}
More information about the mpich2-commits
mailing list