[mpich2-commits] r3970 - in mpich2/trunk/src/pm/hydra: bootstrap/ssh bootstrap/utils control/consys include launcher/mpiexec launcher/utils pm/utils
balaji at mcs.anl.gov
balaji at mcs.anl.gov
Sat Mar 7 21:44:48 CST 2009
Author: balaji
Date: 2009-03-07 21:44:47 -0600 (Sat, 07 Mar 2009)
New Revision: 3970
Modified:
mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_launch.c
mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_init.c
mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_wait.c
mpich2/trunk/src/pm/hydra/control/consys/consys_close.c
mpich2/trunk/src/pm/hydra/control/consys/consys_launch.c
mpich2/trunk/src/pm/hydra/control/consys/consys_wait.c
mpich2/trunk/src/pm/hydra/include/hydra.h
mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c
mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c
mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c
mpich2/trunk/src/pm/hydra/pm/utils/pmi.c
Log:
1. Use partitions instead of hostnames. This will allow non-ssh bootstrap
servers to launch processes more naturally.
2. Get rid of wrap-around reading of hostfiles. This is making the
code unnecessarily complicated without much utility. If this is needed,
it can be added back later.
Modified: mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_launch.c 2009-03-07 23:43:00 UTC (rev 3969)
+++ mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_launch.c 2009-03-08 03:44:47 UTC (rev 3970)
@@ -22,8 +22,9 @@
HYD_Status HYD_BSCI_Launch_procs(void)
{
struct HYD_Proc_params *proc_params;
- char *client_arg[HYD_EXEC_ARGS], *hostname = NULL, **proc_list = NULL;
- int i, arg, process_id, host_id, host_id_max;
+ struct HYD_Partition_list *partition;
+ char *client_arg[HYD_EXEC_ARGS];
+ int i, arg, process_id;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
@@ -46,70 +47,59 @@
goto fn_fail;
}
- proc_params = handle.proc_params;
process_id = 0;
- while (proc_params) {
- if (proc_params->host_file != NULL) { /* We got a new host file */
- host_id = 0;
- host_id_max = proc_params->total_num_procs;
- proc_list = proc_params->total_proc_list;
- }
+ for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
+ for (partition = proc_params->partition; partition; partition = partition->next) {
+ for (i = 0; i < partition->proc_count; i++) {
+ /* Setup the executable arguments */
+ arg = 0;
+ client_arg[arg++] = MPIU_Strdup("/usr/bin/ssh");
- for (i = 0; i < proc_params->user_num_procs; i++) {
- /* Setup the executable arguments */
- arg = 0;
- client_arg[arg++] = MPIU_Strdup("/usr/bin/ssh");
+ /* Allow X forwarding only if explicitly requested */
+ if (handle.enablex == 1)
+ client_arg[arg++] = MPIU_Strdup("-X");
+ else if (handle.enablex == 0)
+ client_arg[arg++] = MPIU_Strdup("-x");
+ else /* default mode is disable X */
+ client_arg[arg++] = MPIU_Strdup("-x");
- /* Allow X forwarding only if explicitly requested */
- if (handle.enablex == 1)
- client_arg[arg++] = MPIU_Strdup("-X");
- else if (handle.enablex == 0)
- client_arg[arg++] = MPIU_Strdup("-x");
- else /* default mode is disable X */
- client_arg[arg++] = MPIU_Strdup("-x");
+ /* ssh does not support any partition names other than host names */
+ client_arg[arg++] = MPIU_Strdup(partition->name);
- if (host_id == host_id_max)
- host_id = 0;
- hostname = proc_list[host_id];
- host_id++;
+ client_arg[arg++] = MPIU_Strdup("sh");
+ client_arg[arg++] = MPIU_Strdup("-c");
+ client_arg[arg++] = MPIU_Strdup("\"");
+ client_arg[arg++] = NULL;
- client_arg[arg++] = MPIU_Strdup(hostname);
+ HYDU_Append_env(handle.system_env, client_arg, process_id);
+ HYDU_Append_env(proc_params->prop_env, client_arg, process_id);
+ HYDU_Append_wdir(client_arg);
+ HYDU_Append_exec(proc_params->exec, client_arg);
- client_arg[arg++] = MPIU_Strdup("sh");
- client_arg[arg++] = MPIU_Strdup("-c");
- client_arg[arg++] = MPIU_Strdup("\"");
- client_arg[arg++] = NULL;
+ for (arg = 0; client_arg[arg]; arg++);
+ client_arg[arg++] = MPIU_Strdup("\"");
+ client_arg[arg++] = NULL;
- HYDU_Append_env(handle.system_env, client_arg, process_id);
- HYDU_Append_env(proc_params->prop_env, client_arg, process_id);
- HYDU_Append_wdir(client_arg);
- HYDU_Append_exec(proc_params->exec, client_arg);
+ /* The stdin pointer will be some value for process_id
+ * 0; for everyone else, it's NULL. */
+ status = HYDU_Create_process(client_arg, (process_id == 0 ? &handle.in : NULL),
+ &proc_params->out[i], &proc_params->err[i],
+ &proc_params->pid[i]);
+ if (status != HYD_SUCCESS) {
+ HYDU_Error_printf("bootstrap spawn process returned error\n");
+ goto fn_fail;
+ }
- for (arg = 0; client_arg[arg]; arg++);
- client_arg[arg++] = MPIU_Strdup("\"");
- client_arg[arg++] = NULL;
+ for (arg = 0; client_arg[arg]; arg++)
+ HYDU_FREE(client_arg[arg]);
- /* The stdin pointer will be some value for process_id 0;
- * for everyone else, it's NULL. */
- status = HYDU_Create_process(client_arg, (process_id == 0 ? &handle.in : NULL),
- &proc_params->out[i], &proc_params->err[i],
- &proc_params->pid[i]);
- if (status != HYD_SUCCESS) {
- HYDU_Error_printf("bootstrap spawn process returned error\n");
- goto fn_fail;
- }
+ /* For the remaining processes, set the stdin fd to -1 */
+ if (process_id != 0)
+ handle.in = -1;
- for (arg = 0; client_arg[arg]; arg++)
- HYDU_FREE(client_arg[arg]);
-
- /* For the remaining processes, set the stdin fd to -1 */
- if (process_id != 0)
- handle.in = -1;
-
- process_id++;
+ process_id++;
+ }
}
-
- proc_params = proc_params->next;
}
fn_exit:
@@ -124,36 +114,23 @@
HYD_Status HYD_BSCI_Cleanup_procs(void)
{
struct HYD_Proc_params *proc_params;
+ struct HYD_Partition_list *partition;
char *client_arg[HYD_EXEC_ARGS], *hostname, **proc_list, *execname;
int i, arg, host_id, host_id_max;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
- proc_params = handle.proc_params;
- while (proc_params) {
- for (i = 0; i < proc_params->user_num_procs; i++) {
+ for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
+ for (partition = proc_params->partition; partition; partition = partition->next) {
/* Setup the executable arguments */
arg = 0;
client_arg[arg++] = MPIU_Strdup("/usr/bin/ssh");
client_arg[arg++] = MPIU_Strdup("-x");
- if (proc_params->host_file != NULL) { /* We got a new host file */
- host_id = 0;
- host_id_max = proc_params->total_num_procs;
- proc_list = proc_params->total_proc_list;
- }
- else if (host_id == host_id_max) {
- host_id = 0;
- }
- hostname = proc_list[host_id];
- host_id++;
+ /* ssh does not support any partition names other than host names */
+ client_arg[arg++] = MPIU_Strdup(partition->name);
- client_arg[arg++] = MPIU_Strdup(hostname);
- client_arg[arg++] = NULL;
-
- HYDU_Append_wdir(client_arg);
-
for (arg = 0; client_arg[arg]; arg++);
client_arg[arg++] = MPIU_Strdup("killall");
@@ -175,8 +152,6 @@
for (arg = 0; client_arg[arg]; arg++)
HYDU_FREE(client_arg[arg]);
}
-
- proc_params = proc_params->next;
}
fn_exit:
Modified: mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_init.c
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_init.c 2009-03-07 23:43:00 UTC (rev 3969)
+++ mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_init.c 2009-03-08 03:44:47 UTC (rev 3970)
@@ -24,11 +24,12 @@
* the same loop. */
proc_params = handle.proc_params;
while (proc_params) {
- HYDU_MALLOC(proc_params->pid, int *, proc_params->user_num_procs * sizeof(int), status);
- HYDU_MALLOC(proc_params->exit_status, int *, proc_params->user_num_procs * sizeof(int), status);
- HYDU_MALLOC(proc_params->exit_status_valid, int *, proc_params->user_num_procs * sizeof(int),
+ HYDU_MALLOC(proc_params->pid, int *, proc_params->exec_proc_count * sizeof(int), status);
+ HYDU_MALLOC(proc_params->exit_status, int *, proc_params->exec_proc_count * sizeof(int),
status);
- for (i = 0; i < proc_params->user_num_procs; i++)
+ HYDU_MALLOC(proc_params->exit_status_valid, int *, proc_params->exec_proc_count * sizeof(int),
+ status);
+ for (i = 0; i < proc_params->exec_proc_count; i++)
proc_params->exit_status_valid[i] = 0;
proc_params = proc_params->next;
}
@@ -51,8 +52,8 @@
proc_params = handle.proc_params;
while (proc_params) {
- HYDU_MALLOC(proc_params->out, int *, proc_params->user_num_procs * sizeof(int), status);
- HYDU_MALLOC(proc_params->err, int *, proc_params->user_num_procs * sizeof(int), status);
+ HYDU_MALLOC(proc_params->out, int *, proc_params->exec_proc_count * sizeof(int), status);
+ HYDU_MALLOC(proc_params->err, int *, proc_params->exec_proc_count * sizeof(int), status);
proc_params = proc_params->next;
}
Modified: mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_wait.c
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_wait.c 2009-03-07 23:43:00 UTC (rev 3969)
+++ mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_wait.c 2009-03-08 03:44:47 UTC (rev 3970)
@@ -28,7 +28,7 @@
not_completed = 0;
proc_params = handle.proc_params;
while (proc_params) {
- for (i = 0; i < proc_params->user_num_procs; i++)
+ for (i = 0; i < proc_params->exec_proc_count; i++)
if (proc_params->exit_status_valid[i] == 0)
not_completed++;
proc_params = proc_params->next;
@@ -45,7 +45,7 @@
/* Find the pid and mark it as complete. */
proc_params = handle.proc_params;
while (proc_params) {
- for (i = 0; i < proc_params->user_num_procs; i++) {
+ for (i = 0; i < proc_params->exec_proc_count; i++) {
if (proc_params->pid[i] == pid) {
proc_params->exit_status[i] = WEXITSTATUS(ret_status);
proc_params->exit_status_valid[i] = 1;
Modified: mpich2/trunk/src/pm/hydra/control/consys/consys_close.c
===================================================================
--- mpich2/trunk/src/pm/hydra/control/consys/consys_close.c 2009-03-07 23:43:00 UTC (rev 3969)
+++ mpich2/trunk/src/pm/hydra/control/consys/consys_close.c 2009-03-08 03:44:47 UTC (rev 3970)
@@ -33,7 +33,7 @@
/* Find the FD in the handle and remove it. */
proc_params = handle.proc_params;
while (proc_params) {
- for (i = 0; i < proc_params->user_num_procs; i++) {
+ for (i = 0; i < proc_params->exec_proc_count; i++) {
if (proc_params->out[i] == fd) {
proc_params->out[i] = -1;
goto fn_exit;
Modified: mpich2/trunk/src/pm/hydra/control/consys/consys_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/control/consys/consys_launch.c 2009-03-07 23:43:00 UTC (rev 3969)
+++ mpich2/trunk/src/pm/hydra/control/consys/consys_launch.c 2009-03-08 03:44:47 UTC (rev 3970)
@@ -28,14 +28,14 @@
proc_params = handle.proc_params;
while (proc_params) {
- status = HYD_DMX_Register_fd(proc_params->user_num_procs, proc_params->out,
+ status = HYD_DMX_Register_fd(proc_params->exec_proc_count, proc_params->out,
HYD_STDOUT, proc_params->stdout_cb);
if (status != HYD_SUCCESS) {
HYDU_Error_printf("demux engine returned error when registering fd\n");
goto fn_fail;
}
- status = HYD_DMX_Register_fd(proc_params->user_num_procs, proc_params->err,
+ status = HYD_DMX_Register_fd(proc_params->exec_proc_count, proc_params->err,
HYD_STDOUT, proc_params->stderr_cb);
if (status != HYD_SUCCESS) {
HYDU_Error_printf("demux engine returned error when registering fd\n");
Modified: mpich2/trunk/src/pm/hydra/control/consys/consys_wait.c
===================================================================
--- mpich2/trunk/src/pm/hydra/control/consys/consys_wait.c 2009-03-07 23:43:00 UTC (rev 3969)
+++ mpich2/trunk/src/pm/hydra/control/consys/consys_wait.c 2009-03-08 03:44:47 UTC (rev 3970)
@@ -33,7 +33,7 @@
proc_params = handle.proc_params;
sockets_open = 0;
while (proc_params) {
- for (i = 0; i < proc_params->user_num_procs; i++) {
+ for (i = 0; i < proc_params->exec_proc_count; i++) {
if (proc_params->out[i] != -1 || proc_params->err[i] != -1) {
sockets_open++;
break;
Modified: mpich2/trunk/src/pm/hydra/include/hydra.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra.h 2009-03-07 23:43:00 UTC (rev 3969)
+++ mpich2/trunk/src/pm/hydra/include/hydra.h 2009-03-08 03:44:47 UTC (rev 3970)
@@ -94,6 +94,8 @@
int enablex;
char *wdir;
+ char *host_file;
+
HYD_Env_t *global_env;
HYD_Env_t *system_env;
HYD_Env_t *user_env;
@@ -112,13 +114,14 @@
/* Each structure will contain all hosts/cores that use the same
* executable and environment. */
struct HYD_Proc_params {
- int user_num_procs;
- int total_num_procs;
- char **total_proc_list;
- int *total_core_list;
+ int exec_proc_count;
+ struct HYD_Partition_list {
+ char * name;
+ int proc_count;
+ char ** mapping; /* Can be core IDs or something else */
+ struct HYD_Partition_list *next;
+ } *partition;
- char *host_file;
-
char *exec[HYD_EXEC_ARGS];
HYD_Env_t *user_env;
HYD_Env_prop_t prop;
Modified: mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c 2009-03-07 23:43:00 UTC (rev 3969)
+++ mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c 2009-03-08 03:44:47 UTC (rev 3970)
@@ -112,7 +112,7 @@
proc_params = handle.proc_params;
exit_status = 0;
while (proc_params) {
- for (i = 0; i < proc_params->user_num_procs; i++)
+ for (i = 0; i < proc_params->exec_proc_count; i++)
exit_status |= proc_params->exit_status[i];
proc_params = proc_params->next;
}
Modified: mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c 2009-03-07 23:43:00 UTC (rev 3969)
+++ mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c 2009-03-08 03:44:47 UTC (rev 3970)
@@ -42,13 +42,9 @@
HYDU_MALLOC(proc_params, struct HYD_Proc_params *, sizeof(struct HYD_Proc_params), status);
- proc_params->user_num_procs = 0;
- proc_params->total_num_procs = 0;
- proc_params->total_proc_list = NULL;
- proc_params->total_core_list = NULL;
+ proc_params->exec_proc_count = 0;
+ proc_params->partition = NULL;
- proc_params->host_file = NULL;
-
proc_params->exec[0] = NULL;
proc_params->user_env = NULL;
proc_params->prop = HYD_ENV_PROP_UNSET;
@@ -103,7 +99,7 @@
HYD_Status HYD_LCHI_Get_parameters(int t_argc, char **t_argv)
{
- int argc = t_argc, i, got_hostfile;
+ int argc = t_argc, i;
char **argv = t_argv;
int local_params_started;
char *arg;
@@ -117,6 +113,7 @@
handle.debug = -1;
handle.enablex = -1;
handle.wdir = NULL;
+ handle.host_file = NULL;
status = HYDU_Env_global_list(&handle.global_env);
if (status != HYD_SUCCESS) {
@@ -304,6 +301,7 @@
CHECK_LOCAL_PARAM_START(local_params_started, status);
CHECK_NEXT_ARG_VALID(status);
handle.wdir = MPIU_Strdup(*argv);
+ continue;
}
if (!strcmp(*argv, "-n") || !strcmp(*argv, "-np")) {
@@ -317,37 +315,22 @@
}
/* Num_procs already set */
- if (proc_params->user_num_procs != 0) {
+ if (proc_params->exec_proc_count != 0) {
HYDU_Error_printf("Duplicate setting for number of processes; previously set to %d\n",
- proc_params->user_num_procs);
+ proc_params->exec_proc_count);
status = HYD_INTERNAL_ERROR;
goto fn_fail;
}
- proc_params->user_num_procs = atoi(*argv);
+ proc_params->exec_proc_count = atoi(*argv);
continue;
}
if (!strcmp(*argv, "-f")) {
- local_params_started = 1;
+ CHECK_LOCAL_PARAM_START(local_params_started, status);
CHECK_NEXT_ARG_VALID(status);
-
- status = get_current_proc_params(&proc_params);
- if (status != HYD_SUCCESS) {
- HYDU_Error_printf("get_current_proc_params returned error\n");
- goto fn_fail;
- }
-
- /* host_file already set */
- if (proc_params->host_file != NULL) {
- HYDU_Error_printf("Duplicate setting for host file; previously set to %s\n",
- proc_params->host_file);
- status = HYD_INTERNAL_ERROR;
- goto fn_fail;
- }
-
- proc_params->host_file = MPIU_Strdup(*argv);
+ handle.host_file = MPIU_Strdup(*argv);
continue;
}
@@ -405,8 +388,23 @@
}
}
+ /*
+ * We use the following priority order to specify the host file:
+ * 1. Specified to mpiexec using -f
+ * 2. Specified through the environment HYDRA_HOST_FILE
+ * 3. Specified through the environment HYDRA_USE_LOCALHOST
+ */
+ if (handle.host_file == NULL && getenv("HYDRA_HOST_FILE"))
+ handle.host_file = MPIU_Strdup(getenv("HYDRA_HOST_FILE"));
+ if (handle.host_file == NULL && getenv("HYDRA_USE_LOCALHOST"))
+ handle.host_file = MPIU_Strdup("HYDRA_USE_LOCALHOST");
+ if (handle.host_file == NULL) {
+ HYDU_Error_printf("Host file not specified\n");
+ status = HYD_INTERNAL_ERROR;
+ goto fn_fail;
+ }
+
proc_params = handle.proc_params;
- got_hostfile = 0;
while (proc_params) {
if (proc_params->exec[0] == NULL) {
HYDU_Error_printf("no executable specified\n");
@@ -414,31 +412,12 @@
goto fn_fail;
}
- if (proc_params->user_num_procs == 0)
- proc_params->user_num_procs = 1;
+ if (proc_params->exec_proc_count == 0)
+ proc_params->exec_proc_count = 1;
- /*
- * We use the following priority order to specify the host file:
- * 1. Specified to mpiexec using -f
- * 2. Specified through the environment HYDRA_HOST_FILE
- * 3. Specified through the environment HYDRA_USE_LOCALHOST
- */
- if (proc_params->host_file == NULL && got_hostfile == 0 && getenv("HYDRA_HOST_FILE"))
- proc_params->host_file = MPIU_Strdup(getenv("HYDRA_HOST_FILE"));
- if (proc_params->host_file == NULL && got_hostfile == 0 && getenv("HYDRA_USE_LOCALHOST"))
- proc_params->host_file = MPIU_Strdup("HYDRA_USE_LOCALHOST");
- if (proc_params->host_file != NULL)
- got_hostfile = 1;
-
proc_params = proc_params->next;
}
- if (got_hostfile == 0) {
- HYDU_Error_printf("Host file not specified\n");
- status = HYD_INTERNAL_ERROR;
- goto fn_fail;
- }
-
fn_exit:
HYDU_FUNC_EXIT();
return status;
Modified: mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c 2009-03-07 23:43:00 UTC (rev 3969)
+++ mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c 2009-03-08 03:44:47 UTC (rev 3970)
@@ -11,110 +11,89 @@
HYD_Status HYD_LCHU_Create_host_list(void)
{
- FILE *fp;
- char line[2 * MAX_HOSTNAME_LEN], *hostfile, *hostname, *procs;
+ FILE *fp = NULL;
+ char line[2 * MAX_HOSTNAME_LEN], *hostname, *procs;
struct HYD_Proc_params *proc_params;
- int i, j, num_procs;
+ struct HYD_Partition_list *partition;
+ int num_procs, total_procs;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
- /* FIXME: We need a better approach than this -- we make two
- * passes for the total host list, one to find the number of
- * hosts, and another to read the actual hosts. */
- proc_params = handle.proc_params;
- while (proc_params) {
- if (proc_params->host_file != NULL) {
- if (!strcmp(proc_params->host_file, "HYDRA_USE_LOCALHOST")) {
- proc_params->total_num_procs++;
- }
- else {
- fp = fopen(proc_params->host_file, "r");
- if (fp == NULL) {
- HYDU_Error_printf("unable to open host file %s\n", proc_params->host_file);
- status = HYD_INTERNAL_ERROR;
- goto fn_fail;
- }
-
- proc_params->total_num_procs = 0;
- while (!feof(fp)) {
- if ((fscanf(fp, "%s", line) < 0) && errno) {
- HYDU_Error_printf("unable to read input line (errno: %d)\n", errno);
- status = HYD_INTERNAL_ERROR;
- goto fn_fail;
- }
- if (feof(fp))
- break;
-
- hostname = strtok(line, ":");
- procs = strtok(NULL, ":");
- if (procs)
- num_procs = atoi(procs);
- else
- num_procs = 1;
-
- proc_params->total_num_procs += num_procs;
- }
-
- fclose(fp);
- }
+ if (strcmp(handle.host_file, "HYDRA_USE_LOCALHOST")) {
+ fp = fopen(handle.host_file, "r");
+ if (fp == NULL) {
+ HYDU_Error_printf("unable to open host file %s\n", handle.host_file);
+ status = HYD_INTERNAL_ERROR;
+ goto fn_fail;
}
- proc_params = proc_params->next;
}
proc_params = handle.proc_params;
while (proc_params) {
- if (proc_params->host_file != NULL) {
+ if (!strcmp(handle.host_file, "HYDRA_USE_LOCALHOST")) {
+ HYDU_MALLOC(proc_params->partition, struct HYD_Partition_list *,
+ sizeof(struct HYD_Partition_list), status);
- HYDU_MALLOC(proc_params->total_proc_list, char **,
- proc_params->total_num_procs * sizeof(char *), status);
- HYDU_MALLOC(proc_params->total_core_list, int *,
- proc_params->total_num_procs * sizeof(int), status);
-
- if (!strcmp(proc_params->host_file, "HYDRA_USE_LOCALHOST")) {
- proc_params->total_proc_list[0] = MPIU_Strdup("localhost");
- proc_params->total_core_list[0] = -1;
- }
- else {
- fp = fopen(proc_params->host_file, "r");
- if (fp == NULL) {
- HYDU_Error_printf("unable to open host file %s\n", proc_params->host_file);
+ proc_params->partition->name = MPIU_Strdup("localhost");
+ proc_params->partition->proc_count = proc_params->exec_proc_count;
+ proc_params->partition->mapping = NULL;
+ proc_params->partition->next = NULL;
+ total_procs = proc_params->exec_proc_count;
+ }
+ else {
+ total_procs = 0;
+ while (!feof(fp)) {
+ if ((fscanf(fp, "%s", line) < 0) && errno) {
+ HYDU_Error_printf("unable to read input line (errno: %d)\n", errno);
status = HYD_INTERNAL_ERROR;
goto fn_fail;
}
+ if (feof(fp))
+ break;
- i = 0;
- while (!feof(fp)) {
- if ((fscanf(fp, "%s", line) < 0) && errno) {
- HYDU_Error_printf("unable to read input line (errno: %d)\n", errno);
- status = HYD_INTERNAL_ERROR;
- goto fn_fail;
- }
- if (feof(fp))
- break;
+ hostname = strtok(line, ":");
+ procs = strtok(NULL, ":");
- hostname = strtok(line, ":");
- procs = strtok(NULL, ":");
+ num_procs = procs ? atoi(procs) : 1;
+ if (num_procs > (proc_params->exec_proc_count - total_procs))
+ num_procs = (proc_params->exec_proc_count - total_procs);
- if (procs)
- num_procs = atoi(procs);
- else
- num_procs = 1;
-
- for (j = 0; j < num_procs; j++) {
- proc_params->total_proc_list[i] = MPIU_Strdup(hostname);
- proc_params->total_core_list[i] = -1;
- i++;
- }
+ if (proc_params->partition) {
+ for (partition = proc_params->partition; partition->next;
+ partition = partition->next);
+ HYDU_MALLOC(partition->next, struct HYD_Partition_list *,
+ sizeof(struct HYD_Partition_list), status);
+ partition = partition->next;
}
- fclose(fp);
+ partition->name = MPIU_Strdup(hostname);
+
+ /* FIXME: We don't support mappings yet */
+ partition->mapping = NULL;
+ partition->proc_count = num_procs;
+ partition->next = NULL;
+
+ total_procs += num_procs;
+ if (total_procs == proc_params->exec_proc_count)
+ break;
}
}
+
+ if (total_procs != proc_params->exec_proc_count)
+ break;
proc_params = proc_params->next;
}
+ if (proc_params) {
+ HYDU_Error_printf("Not enough number of hosts in host file: %s\n", handle.host_file);
+ status = HYD_INTERNAL_ERROR;
+ goto fn_fail;
+ }
+
fn_exit:
+ if (fp)
+ fclose(fp);
HYDU_FUNC_EXIT();
return status;
@@ -126,20 +105,23 @@
HYD_Status HYD_LCHU_Free_host_list(void)
{
struct HYD_Proc_params *proc_params;
+ struct HYD_Partition_list *partition;
int i;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
- proc_params = handle.proc_params;
- while (proc_params) {
- for (i = 0; i < proc_params->total_num_procs; i++)
- HYDU_FREE(proc_params->total_proc_list[i]);
- HYDU_FREE(proc_params->total_proc_list);
- HYDU_FREE(proc_params->total_core_list);
- HYDU_FREE(proc_params->host_file);
- proc_params = proc_params->next;
+ for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
+ for (partition = proc_params->partition; partition; partition = partition->next) {
+ HYDU_FREE(partition->name);
+ if (partition->mapping) {
+ if (partition->mapping[i])
+ HYDU_FREE(partition->mapping[i]);
+ HYDU_FREE(partition->mapping);
+ }
+ }
}
+ HYDU_FREE(handle.host_file);
HYDU_FUNC_EXIT();
return status;
Modified: mpich2/trunk/src/pm/hydra/pm/utils/pmi.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/utils/pmi.c 2009-03-07 23:43:00 UTC (rev 3969)
+++ mpich2/trunk/src/pm/hydra/pm/utils/pmi.c 2009-03-08 03:44:47 UTC (rev 3970)
@@ -104,7 +104,7 @@
num_procs = 0;
proc_params = handle.proc_params;
while (proc_params) {
- num_procs += proc_params->user_num_procs;
+ num_procs += proc_params->exec_proc_count;
proc_params = proc_params->next;
}
@@ -140,7 +140,7 @@
size = 0;
proc_params = handle.proc_params;
while (proc_params) {
- size += proc_params->user_num_procs;
+ size += proc_params->exec_proc_count;
proc_params = proc_params->next;
}
debug = handle.debug;
More information about the mpich2-commits
mailing list