[mpich2-commits] r3972 - in mpich2/trunk/src/pm/hydra: bootstrap/ssh bootstrap/utils control/consys include launcher/mpiexec launcher/utils pm/central utils/launch
balaji at mcs.anl.gov
balaji at mcs.anl.gov
Sun Mar 8 18:35:27 CDT 2009
Author: balaji
Date: 2009-03-08 18:35:27 -0500 (Sun, 08 Mar 2009)
New Revision: 3972
Added:
mpich2/trunk/src/pm/hydra/utils/launch/allocate.c
Removed:
mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_finalize.c
mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_init.c
Modified:
mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_finalize.c
mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_launch.c
mpich2/trunk/src/pm/hydra/bootstrap/utils/Makefile.sm
mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu.h
mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_wait.c
mpich2/trunk/src/pm/hydra/control/consys/consys_close.c
mpich2/trunk/src/pm/hydra/control/consys/consys_launch.c
mpich2/trunk/src/pm/hydra/control/consys/consys_wait.c
mpich2/trunk/src/pm/hydra/include/hydra.h
mpich2/trunk/src/pm/hydra/include/hydra_launch.h
mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c
mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c
mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c
mpich2/trunk/src/pm/hydra/launcher/utils/lchu.h
mpich2/trunk/src/pm/hydra/pm/central/central_launch.c
mpich2/trunk/src/pm/hydra/utils/launch/Makefile.sm
Log:
Infrastructure changes to the Hydra framework to support the proxy.
Modified: mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_finalize.c
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_finalize.c 2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_finalize.c 2009-03-08 23:35:27 UTC (rev 3972)
@@ -16,18 +16,6 @@
HYDU_FUNC_ENTER();
- status = HYD_BSCU_Finalize_exit_status();
- if (status != HYD_SUCCESS) {
- HYDU_Error_printf("unable to finalize exit status\n");
- goto fn_fail;
- }
-
- status = HYD_BSCU_Finalize_io_fds();
- if (status != HYD_SUCCESS) {
- HYDU_Error_printf("unable to finalize I/O fds\n");
- goto fn_fail;
- }
-
fn_exit:
HYDU_FUNC_EXIT();
return status;
Modified: mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_launch.c 2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_launch.c 2009-03-08 23:35:27 UTC (rev 3972)
@@ -29,76 +29,54 @@
HYDU_FUNC_ENTER();
- status = HYD_BSCU_Init_exit_status();
- if (status != HYD_SUCCESS) {
- HYDU_Error_printf("bootstrap utils returned error when initializing exit status\n");
- goto fn_fail;
- }
-
status = HYD_BSCU_Set_common_signals(HYD_BSCU_Signal_handler);
if (status != HYD_SUCCESS) {
HYDU_Error_printf("signal utils returned error when trying to set signal\n");
goto fn_fail;
}
- status = HYD_BSCU_Init_io_fds();
- if (status != HYD_SUCCESS) {
- HYDU_Error_printf("bootstrap utils returned error when initializing io fds\n");
- goto fn_fail;
- }
-
process_id = 0;
for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
for (partition = proc_params->partition; partition; partition = partition->next) {
- for (i = 0; i < partition->proc_count; i++) {
- /* Setup the executable arguments */
- arg = 0;
- client_arg[arg++] = MPIU_Strdup("/usr/bin/ssh");
+ if (partition->group_rank) /* Only rank 0 is spawned */
+ continue;
- /* Allow X forwarding only if explicitly requested */
- if (handle.enablex == 1)
- client_arg[arg++] = MPIU_Strdup("-X");
- else if (handle.enablex == 0)
- client_arg[arg++] = MPIU_Strdup("-x");
- else /* default mode is disable X */
- client_arg[arg++] = MPIU_Strdup("-x");
+ /* Setup the executable arguments */
+ arg = 0;
+ client_arg[arg++] = MPIU_Strdup("/usr/bin/ssh");
- /* ssh does not support any partition names other than host names */
- client_arg[arg++] = MPIU_Strdup(partition->name);
+ /* Allow X forwarding only if explicitly requested */
+ if (handle.enablex == 1)
+ client_arg[arg++] = MPIU_Strdup("-X");
+ else if (handle.enablex == 0)
+ client_arg[arg++] = MPIU_Strdup("-x");
+ else /* default mode is disable X */
+ client_arg[arg++] = MPIU_Strdup("-x");
- client_arg[arg++] = MPIU_Strdup("sh");
- client_arg[arg++] = MPIU_Strdup("-c");
- client_arg[arg++] = MPIU_Strdup("\"");
- client_arg[arg++] = NULL;
+ /* ssh does not support any partition names other than host names */
+ client_arg[arg++] = MPIU_Strdup(partition->name);
- HYDU_Append_env(handle.system_env, client_arg, process_id);
- HYDU_Append_env(proc_params->prop_env, client_arg, process_id);
- HYDU_Append_wdir(client_arg);
- HYDU_Append_exec(proc_params->exec, client_arg);
+ for (i = 0; partition->args[i]; i++)
+ client_arg[arg++] = MPIU_Strdup(partition->args[i]);
+ client_arg[arg] = NULL;
- for (arg = 0; client_arg[arg]; arg++);
- client_arg[arg++] = MPIU_Strdup("\"");
- client_arg[arg++] = NULL;
+ /* The stdin pointer will be some value for process_id 0;
+ * for everyone else, it's NULL. */
+ status = HYDU_Create_process(client_arg, (process_id == 0 ? &handle.in : NULL),
+ &partition->out, &partition->err, &partition->pid);
+ if (status != HYD_SUCCESS) {
+ HYDU_Error_printf("bootstrap spawn process returned error\n");
+ goto fn_fail;
+ }
- /* The stdin pointer will be some value for process_id
- * 0; for everyone else, it's NULL. */
- status = HYDU_Create_process(client_arg, (process_id == 0 ? &handle.in : NULL),
- &proc_params->out[i], &proc_params->err[i],
- &proc_params->pid[i]);
- if (status != HYD_SUCCESS) {
- HYDU_Error_printf("bootstrap spawn process returned error\n");
- goto fn_fail;
- }
+ for (arg = 0; client_arg[arg]; arg++)
+ HYDU_FREE(client_arg[arg]);
- for (arg = 0; client_arg[arg]; arg++)
- HYDU_FREE(client_arg[arg]);
+ /* For the remaining processes, set the stdin fd to -1 */
+ if (process_id != 0)
+ handle.in = -1;
- /* For the remaining processes, set the stdin fd to -1 */
- if (process_id != 0)
- handle.in = -1;
-
- process_id++;
- }
+ process_id++;
}
}
Modified: mpich2/trunk/src/pm/hydra/bootstrap/utils/Makefile.sm
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/utils/Makefile.sm 2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/bootstrap/utils/Makefile.sm 2009-03-08 23:35:27 UTC (rev 3972)
@@ -7,7 +7,7 @@
HYDRA_LIB_PATH = ../../lib
libhydra_a_DIR = ${HYDRA_LIB_PATH}
-libhydra_a_SOURCES = bscu_init.c bscu_finalize.c bscu_wait.c bscu_signal.c
+libhydra_a_SOURCES = bscu_wait.c bscu_signal.c
INCLUDES = -I${abs_srcdir}/../../include \
-I${abs_srcdir}/../../../../include \
-I../../include \
Modified: mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu.h
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu.h 2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu.h 2009-03-08 23:35:27 UTC (rev 3972)
@@ -11,10 +11,6 @@
#include "hydra_sig.h"
#include "bsci.h"
-HYD_Status HYD_BSCU_Init_exit_status(void);
-HYD_Status HYD_BSCU_Finalize_exit_status(void);
-HYD_Status HYD_BSCU_Init_io_fds(void);
-HYD_Status HYD_BSCU_Finalize_io_fds(void);
HYD_Status HYD_BSCU_Wait_for_completion(void);
HYD_Status HYD_BSCU_Set_common_signals(void (*handler) (int));
void HYD_BSCU_Signal_handler(int signal);
Deleted: mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_finalize.c
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_finalize.c 2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_finalize.c 2009-03-08 23:35:27 UTC (rev 3972)
@@ -1,51 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- * (C) 2008 by Argonne National Laboratory.
- * See COPYRIGHT in top-level directory.
- */
-
-#include "hydra.h"
-#include "hydra_mem.h"
-#include "bsci.h"
-#include "bscu.h"
-
-HYD_Handle handle;
-
-HYD_Status HYD_BSCU_Finalize_exit_status(void)
-{
- struct HYD_Proc_params *proc_params;
- int i;
- HYD_Status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- proc_params = handle.proc_params;
- while (proc_params) {
- HYDU_FREE(proc_params->pid);
- HYDU_FREE(proc_params->exit_status);
- HYDU_FREE(proc_params->exit_status_valid);
- proc_params = proc_params->next;
- }
-
- HYDU_FUNC_EXIT();
- return status;
-}
-
-
-HYD_Status HYD_BSCU_Finalize_io_fds(void)
-{
- struct HYD_Proc_params *proc_params;
- HYD_Status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- proc_params = handle.proc_params;
- while (proc_params) {
- HYDU_FREE(proc_params->out);
- HYDU_FREE(proc_params->err);
- proc_params = proc_params->next;
- }
-
- HYDU_FUNC_EXIT();
- return status;
-}
Deleted: mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_init.c
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_init.c 2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_init.c 2009-03-08 23:35:27 UTC (rev 3972)
@@ -1,66 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- * (C) 2008 by Argonne National Laboratory.
- * See COPYRIGHT in top-level directory.
- */
-
-#include "hydra.h"
-#include "hydra_mem.h"
-#include "bsci.h"
-#include "bscu.h"
-
-HYD_Handle handle;
-
-HYD_Status HYD_BSCU_Init_exit_status(void)
-{
- struct HYD_Proc_params *proc_params;
- int i;
- HYD_Status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- /* Set the exit status of all processes to 1 (> 0 means that the
- * status is not set yet). Also count the number of processes in
- * the same loop. */
- proc_params = handle.proc_params;
- while (proc_params) {
- HYDU_MALLOC(proc_params->pid, int *, proc_params->exec_proc_count * sizeof(int), status);
- HYDU_MALLOC(proc_params->exit_status, int *, proc_params->exec_proc_count * sizeof(int),
- status);
- HYDU_MALLOC(proc_params->exit_status_valid, int *, proc_params->exec_proc_count * sizeof(int),
- status);
- for (i = 0; i < proc_params->exec_proc_count; i++)
- proc_params->exit_status_valid[i] = 0;
- proc_params = proc_params->next;
- }
-
- fn_exit:
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- goto fn_exit;
-}
-
-
-HYD_Status HYD_BSCU_Init_io_fds(void)
-{
- struct HYD_Proc_params *proc_params;
- HYD_Status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- proc_params = handle.proc_params;
- while (proc_params) {
- HYDU_MALLOC(proc_params->out, int *, proc_params->exec_proc_count * sizeof(int), status);
- HYDU_MALLOC(proc_params->err, int *, proc_params->exec_proc_count * sizeof(int), status);
- proc_params = proc_params->next;
- }
-
- fn_exit:
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- goto fn_exit;
-}
Modified: mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_wait.c
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_wait.c 2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_wait.c 2009-03-08 23:35:27 UTC (rev 3972)
@@ -21,18 +21,16 @@
{
int pid, ret_status, i, not_completed;
struct HYD_Proc_params *proc_params;
+ struct HYD_Partition_list *partition;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
not_completed = 0;
- proc_params = handle.proc_params;
- while (proc_params) {
- for (i = 0; i < proc_params->exec_proc_count; i++)
- if (proc_params->exit_status_valid[i] == 0)
+ for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next)
+ for (partition = proc_params->partition; partition; partition = partition->next)
+ if (partition->exit_status == -1)
not_completed++;
- proc_params = proc_params->next;
- }
/* We get here only after the I/O sockets have been closed. If the
* application did not manually close its stdout and stderr
@@ -43,16 +41,13 @@
pid = waitpid(-1, &ret_status, WNOHANG);
if (pid > 0) {
/* Find the pid and mark it as complete. */
- proc_params = handle.proc_params;
- while (proc_params) {
- for (i = 0; i < proc_params->exec_proc_count; i++) {
- if (proc_params->pid[i] == pid) {
- proc_params->exit_status[i] = WEXITSTATUS(ret_status);
- proc_params->exit_status_valid[i] = 1;
+ for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
+ for (partition = proc_params->partition; partition; partition = partition->next) {
+ if (partition->pid == pid) {
+ partition->exit_status = WEXITSTATUS(ret_status);
not_completed--;
}
}
- proc_params = proc_params->next;
}
}
if (HYD_CSU_Time_left() == 0)
Modified: mpich2/trunk/src/pm/hydra/control/consys/consys_close.c
===================================================================
--- mpich2/trunk/src/pm/hydra/control/consys/consys_close.c 2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/control/consys/consys_close.c 2009-03-08 23:35:27 UTC (rev 3972)
@@ -17,6 +17,7 @@
{
int i;
struct HYD_Proc_params *proc_params;
+ struct HYD_Partition_list *partition;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
@@ -31,19 +32,17 @@
close(fd);
/* Find the FD in the handle and remove it. */
- proc_params = handle.proc_params;
- while (proc_params) {
- for (i = 0; i < proc_params->exec_proc_count; i++) {
- if (proc_params->out[i] == fd) {
- proc_params->out[i] = -1;
+ for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
+ for (partition = proc_params->partition; partition; partition = partition->next) {
+ if (partition->out == fd) {
+ partition->out = -1;
goto fn_exit;
}
- if (proc_params->err[i] == fd) {
- proc_params->err[i] = -1;
+ if (partition->err == fd) {
+ partition->err = -1;
goto fn_exit;
}
}
- proc_params = proc_params->next;
}
fn_exit:
Modified: mpich2/trunk/src/pm/hydra/control/consys/consys_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/control/consys/consys_launch.c 2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/control/consys/consys_launch.c 2009-03-08 23:35:27 UTC (rev 3972)
@@ -15,6 +15,7 @@
HYD_Status HYD_CSI_Launch_procs(void)
{
struct HYD_Proc_params *proc_params;
+ struct HYD_Partition_list *partition;
int stdin_fd, flags, count;
HYD_Status status = HYD_SUCCESS;
@@ -26,23 +27,20 @@
goto fn_fail;
}
- proc_params = handle.proc_params;
- while (proc_params) {
- status = HYD_DMX_Register_fd(proc_params->exec_proc_count, proc_params->out,
- HYD_STDOUT, proc_params->stdout_cb);
- if (status != HYD_SUCCESS) {
- HYDU_Error_printf("demux engine returned error when registering fd\n");
- goto fn_fail;
- }
+ for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
+ for (partition = proc_params->partition; partition; partition = partition->next) {
+ status = HYD_DMX_Register_fd(1, &partition->out, HYD_STDOUT, proc_params->stdout_cb);
+ if (status != HYD_SUCCESS) {
+ HYDU_Error_printf("demux engine returned error when registering fd\n");
+ goto fn_fail;
+ }
- status = HYD_DMX_Register_fd(proc_params->exec_proc_count, proc_params->err,
- HYD_STDOUT, proc_params->stderr_cb);
- if (status != HYD_SUCCESS) {
- HYDU_Error_printf("demux engine returned error when registering fd\n");
- goto fn_fail;
+ status = HYD_DMX_Register_fd(1, &partition->err, HYD_STDOUT, proc_params->stderr_cb);
+ if (status != HYD_SUCCESS) {
+ HYDU_Error_printf("demux engine returned error when registering fd\n");
+ goto fn_fail;
+ }
}
-
- proc_params = proc_params->next;
}
if (handle.in != -1) { /* Only process_id 0 */
Modified: mpich2/trunk/src/pm/hydra/control/consys/consys_wait.c
===================================================================
--- mpich2/trunk/src/pm/hydra/control/consys/consys_wait.c 2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/control/consys/consys_wait.c 2009-03-08 23:35:27 UTC (rev 3972)
@@ -16,6 +16,7 @@
{
int sockets_open, i;
struct HYD_Proc_params *proc_params;
+ struct HYD_Partition_list *partition;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
@@ -30,19 +31,16 @@
/* Check to see if there's any open read socket left; if there
* are, we will just wait for more events. */
- proc_params = handle.proc_params;
sockets_open = 0;
- while (proc_params) {
- for (i = 0; i < proc_params->exec_proc_count; i++) {
- if (proc_params->out[i] != -1 || proc_params->err[i] != -1) {
+ for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
+ for (partition = proc_params->partition; partition; partition = partition->next) {
+ if (partition->out != -1 || partition->err != -1) {
sockets_open++;
break;
}
}
if (sockets_open)
break;
-
- proc_params = proc_params->next;
}
if (sockets_open && HYD_CSU_Time_left())
Modified: mpich2/trunk/src/pm/hydra/include/hydra.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra.h 2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/include/hydra.h 2009-03-08 23:35:27 UTC (rev 3972)
@@ -96,6 +96,7 @@
char *host_file;
+ /* Global environment */
HYD_Env_t *global_env;
HYD_Env_t *system_env;
HYD_Env_t *user_env;
@@ -115,37 +116,56 @@
* executable and environment. */
struct HYD_Proc_params {
int exec_proc_count;
+ char *exec[HYD_EXEC_ARGS];
+
struct HYD_Partition_list {
char * name;
int proc_count;
char ** mapping; /* Can be core IDs or something else */
+
+ /*
+ * The boot-strap server is expected to start a single
+ * executable on the first possible node and return a
+ * single PID. This executable could be a PM proxy that
+ * will launch the actual application on the rest of the
+ * partition list.
+ *
+ * Possible hacks:
+ *
+ * 1. If the process manager needs more proxies within
+ * this same list, it can use different group
+ * IDs. Each group ID will have its own proxy.
+ *
+ * 2. If no proxy is needed, the PM can split this list
+ * into one element per process. The boot-strap
+ * server itself does not distinguish a proxy from
+ * the application executable, so it will not require
+ * any changes.
+ *
+ * 3. One proxy per physical node means that each
+ * partition will have a different group ID.
+ */
+ int group_id; /* Assumed to be in ascending order */
+ int group_rank; /* Rank within the group */
+ int pid;
+ int out;
+ int err;
+ int exit_status;
+ char * args[HYD_EXEC_ARGS];
+
struct HYD_Partition_list *next;
} *partition;
- char *exec[HYD_EXEC_ARGS];
+ /* Local environment */
HYD_Env_t *user_env;
HYD_Env_prop_t prop;
HYD_Env_t *prop_env;
- /* These output FDs are filled in by the lower layers */
- int *out;
- int *err;
-
/* Callback functions for the stdout/stderr events. These can
* be the same. */
HYD_Status(*stdout_cb) (int fd, HYD_Event_t events);
HYD_Status(*stderr_cb) (int fd, HYD_Event_t events);
- /* Status > 0 means that it is not set yet. Successful
- * completion of a process will set the status to 0. An error
- * will set this to a negative value corresponding to the
- * error. Depending on the bootstrap server, these values
- * might correspond to per-process status, or can be a common
- * value for all processes. */
- int *pid;
- int *exit_status;
- int *exit_status_valid;
-
struct HYD_Proc_params *next;
} *proc_params;
Modified: mpich2/trunk/src/pm/hydra/include/hydra_launch.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra_launch.h 2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/include/hydra_launch.h 2009-03-08 23:35:27 UTC (rev 3972)
@@ -12,6 +12,7 @@
HYD_Status HYDU_Append_env(HYD_Env_t * env_list, char **client_arg, int id);
HYD_Status HYDU_Append_exec(char **exec, char **client_arg);
HYD_Status HYDU_Append_wdir(char **client_arg);
+HYD_Status HYDU_Allocate_Partition(struct HYD_Partition_list **partition);
HYD_Status HYDU_Create_process(char **client_arg, int *in, int *out, int *err, int *pid);
#endif /* HYDRA_LAUNCH_H_INCLUDED */
Modified: mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c 2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c 2009-03-08 23:35:27 UTC (rev 3972)
@@ -45,6 +45,7 @@
int main(int argc, char **argv)
{
struct HYD_Proc_params *proc_params;
+ struct HYD_Partition_list *partition;
int exit_status, i;
HYD_Status status = HYD_SUCCESS;
@@ -109,13 +110,10 @@
}
/* Check for the exit status for all the processes */
- proc_params = handle.proc_params;
exit_status = 0;
- while (proc_params) {
- for (i = 0; i < proc_params->exec_proc_count; i++)
- exit_status |= proc_params->exit_status[i];
- proc_params = proc_params->next;
- }
+ for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next)
+ for (partition = proc_params->partition; partition; partition = partition->next)
+ exit_status |= partition->exit_status;
/* Call finalize functions for lower layers to cleanup their resources */
status = HYD_CSI_Finalize();
Modified: mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c 2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c 2009-03-08 23:35:27 UTC (rev 3972)
@@ -49,11 +49,8 @@
proc_params->user_env = NULL;
proc_params->prop = HYD_ENV_PROP_UNSET;
proc_params->prop_env = NULL;
- proc_params->out = NULL;
- proc_params->err = NULL;
proc_params->stdout_cb = NULL;
proc_params->stderr_cb = NULL;
- proc_params->exit_status = NULL;
proc_params->next = NULL;
*params = proc_params;
Modified: mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c 2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c 2009-03-08 23:35:27 UTC (rev 3972)
@@ -32,13 +32,9 @@
proc_params = handle.proc_params;
while (proc_params) {
if (!strcmp(handle.host_file, "HYDRA_USE_LOCALHOST")) {
- HYDU_MALLOC(proc_params->partition, struct HYD_Partition_list *,
- sizeof(struct HYD_Partition_list), status);
-
+ HYDU_Allocate_Partition(&proc_params->partition);
proc_params->partition->name = MPIU_Strdup("localhost");
proc_params->partition->proc_count = proc_params->exec_proc_count;
- proc_params->partition->mapping = NULL;
- proc_params->partition->next = NULL;
total_procs = proc_params->exec_proc_count;
}
else {
@@ -59,20 +55,18 @@
if (num_procs > (proc_params->exec_proc_count - total_procs))
num_procs = (proc_params->exec_proc_count - total_procs);
- if (proc_params->partition) {
+ if (!proc_params->partition) {
+ HYDU_Allocate_Partition(&proc_params->partition);
+ partition = proc_params->partition;
+ }
+ else {
for (partition = proc_params->partition; partition->next;
partition = partition->next);
- HYDU_MALLOC(partition->next, struct HYD_Partition_list *,
- sizeof(struct HYD_Partition_list), status);
+ HYDU_Allocate_Partition(&partition->next);
partition = partition->next;
}
-
partition->name = MPIU_Strdup(hostname);
-
- /* FIXME: We don't support mappings yet */
- partition->mapping = NULL;
partition->proc_count = num_procs;
- partition->next = NULL;
total_procs += num_procs;
if (total_procs == proc_params->exec_proc_count)
@@ -198,15 +192,16 @@
HYD_Status HYD_LCHU_Free_io(void)
{
struct HYD_Proc_params *proc_params;
+ struct HYD_Partition_list *partition;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
- proc_params = handle.proc_params;
- while (proc_params) {
- HYDU_FREE(proc_params->out);
- HYDU_FREE(proc_params->err);
- proc_params = proc_params->next;
+ for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
+ for (partition = proc_params->partition; partition; partition = partition->next) {
+ HYDU_FREE(partition->out);
+ HYDU_FREE(partition->err);
+ }
}
HYDU_FUNC_EXIT();
@@ -214,24 +209,6 @@
}
-HYD_Status HYD_LCHU_Free_exits(void)
-{
- struct HYD_Proc_params *proc_params;
- HYD_Status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- proc_params = handle.proc_params;
- while (proc_params) {
- HYDU_FREE(proc_params->exit_status);
- proc_params = proc_params->next;
- }
-
- HYDU_FUNC_EXIT();
- return status;
-}
-
-
HYD_Status HYD_LCHU_Free_exec(void)
{
struct HYD_Proc_params *proc_params;
Modified: mpich2/trunk/src/pm/hydra/launcher/utils/lchu.h
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/utils/lchu.h 2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/launcher/utils/lchu.h 2009-03-08 23:35:27 UTC (rev 3972)
@@ -14,7 +14,6 @@
HYD_Status HYD_LCHU_Create_env_list(void);
HYD_Status HYD_LCHU_Free_env_list(void);
HYD_Status HYD_LCHU_Free_io(void);
-HYD_Status HYD_LCHU_Free_exits(void);
HYD_Status HYD_LCHU_Free_exec(void);
HYD_Status HYD_LCHU_Free_proc_params(void);
Modified: mpich2/trunk/src/pm/hydra/pm/central/central_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/central/central_launch.c 2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/pm/central/central_launch.c 2009-03-08 23:35:27 UTC (rev 3972)
@@ -32,18 +32,20 @@
* auto-incrementing variable; the bootstrap server will take care of
* adding the process ID to the start value.
*
- * 5. Ask the bootstrap server to launch the processes.
+ * 5. Create a process info setup and ask the bootstrap server to
+ * launch the processes.
*/
HYD_Status HYD_PMCI_Launch_procs(void)
{
char *port_range, *port_str, *sport;
uint16_t low_port, high_port, port;
- int one = 1, i;
- int num_procs;
+ int one = 1, i, arg;
+ int num_procs, process_id, group_id;
char hostname[MAX_HOSTNAME_LEN];
struct sockaddr_in sa;
HYD_Env_t *env;
struct HYD_Proc_params *proc_params;
+ struct HYD_Partition_list *partition, *run, *next_partition;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
@@ -141,6 +143,58 @@
goto fn_fail;
}
+ /* FIXME: Temporary hack for testing till the proxy is in shape to
+ * be used -- we just break the partition list to multiple
+ * segments, one for each process and call the application
+ * executable directly. */
+ for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
+ group_id = 0;
+ for (partition = proc_params->partition; partition;) {
+ next_partition = partition->next; /* Keep track of the next partition */
+
+ partition->group_id = group_id++;
+ partition->group_rank = 0;
+
+ run = partition;
+ for (process_id = 1; process_id < partition->proc_count; process_id++) {
+ HYDU_Allocate_Partition(&run->next);
+ run = run->next;
+
+ run->name = MPIU_Strdup(partition->name);
+ run->proc_count = 1;
+ run->group_id = group_id++;
+ run->group_rank = 0;
+ }
+
+ partition->proc_count = 1;
+ partition = next_partition;
+ }
+ }
+
+ /* Create the arguments list for each proxy */
+ process_id = 0;
+ for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
+ for (partition = proc_params->partition; partition; partition = partition->next) {
+ /* Setup the executable arguments */
+ arg = 0;
+ partition->args[arg++] = MPIU_Strdup("sh");
+ partition->args[arg++] = MPIU_Strdup("-c");
+ partition->args[arg++] = MPIU_Strdup("\"");
+ partition->args[arg++] = NULL;
+
+ HYDU_Append_env(handle.system_env, partition->args, process_id);
+ HYDU_Append_env(proc_params->prop_env, partition->args, process_id);
+ HYDU_Append_wdir(partition->args);
+ HYDU_Append_exec(proc_params->exec, partition->args);
+
+ for (arg = 0; partition->args[arg]; arg++);
+ partition->args[arg++] = MPIU_Strdup("\"");
+ partition->args[arg++] = NULL;
+
+ process_id++;
+ }
+ }
+
/* Initialize the bootstrap server and ask it to launch the
* processes. */
status = HYD_BSCI_Launch_procs();
Modified: mpich2/trunk/src/pm/hydra/utils/launch/Makefile.sm
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/launch/Makefile.sm 2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/utils/launch/Makefile.sm 2009-03-08 23:35:27 UTC (rev 3972)
@@ -7,7 +7,7 @@
HYDRA_LIB_PATH = ../../lib
libhydra_a_DIR = ${HYDRA_LIB_PATH}
-libhydra_a_SOURCES = args.c launch.c
+libhydra_a_SOURCES = args.c allocate.c launch.c
INCLUDES = -I${abs_srcdir}/../../include \
-I${abs_srcdir}/../../../../include \
-I../../include \
Added: mpich2/trunk/src/pm/hydra/utils/launch/allocate.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/launch/allocate.c (rev 0)
+++ mpich2/trunk/src/pm/hydra/utils/launch/allocate.c 2009-03-08 23:35:27 UTC (rev 3972)
@@ -0,0 +1,40 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ * (C) 2008 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include "hydra.h"
+#include "hydra_mem.h"
+#include "hydra_launch.h"
+
+HYD_Handle handle;
+
+HYD_Status HYDU_Allocate_Partition(struct HYD_Partition_list ** partition)
+{
+ HYD_Status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ HYDU_MALLOC(*partition, struct HYD_Partition_list *, sizeof(struct HYD_Partition_list),
+ status);
+ (*partition)->name = NULL;
+ (*partition)->proc_count = 0;
+ (*partition)->mapping = NULL;
+ (*partition)->group_id = -1;
+ (*partition)->group_rank = -1;
+ (*partition)->pid = -1;
+ (*partition)->out = -1;
+ (*partition)->err = -1;
+ (*partition)->exit_status = -1;
+ (*partition)->args[0] = NULL;
+
+ (*partition)->next = NULL;
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ goto fn_exit;
+}
More information about the mpich2-commits
mailing list