[mpich2-commits] r8001 - in mpich2/trunk/src/pm/hydra: include pm/pmiserv tools/bootstrap/external tools/bootstrap/include tools/bootstrap/persist tools/bootstrap/src tools/debugger ui/mpich ui/utils utils/alloc utils/others
balaji at mcs.anl.gov
balaji at mcs.anl.gov
Mon Feb 21 14:53:46 CST 2011
Author: balaji
Date: 2011-02-21 14:53:45 -0600 (Mon, 21 Feb 2011)
New Revision: 8001
Modified:
mpich2/trunk/src/pm/hydra/include/hydra.h
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip.c
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip.h
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_cb.c
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_pmi_v1.c
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_utils.c
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_cb.c
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmci.c
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v1.c
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v2.c
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_utils.c
mpich2/trunk/src/pm/hydra/tools/bootstrap/external/external.h
mpich2/trunk/src/pm/hydra/tools/bootstrap/external/external_launch.c
mpich2/trunk/src/pm/hydra/tools/bootstrap/external/ll.h
mpich2/trunk/src/pm/hydra/tools/bootstrap/external/ll_launch.c
mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm.h
mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm_launch.c
mpich2/trunk/src/pm/hydra/tools/bootstrap/include/bsci.h
mpich2/trunk/src/pm/hydra/tools/bootstrap/persist/persist_client.h
mpich2/trunk/src/pm/hydra/tools/bootstrap/persist/persist_launch.c
mpich2/trunk/src/pm/hydra/tools/bootstrap/src/bsci_launch.c
mpich2/trunk/src/pm/hydra/tools/debugger/debugger.c
mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.c
mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.h
mpich2/trunk/src/pm/hydra/ui/mpich/utils.c
mpich2/trunk/src/pm/hydra/ui/utils/uiu.c
mpich2/trunk/src/pm/hydra/utils/alloc/alloc.c
mpich2/trunk/src/pm/hydra/utils/others/others.c
Log:
Redo the PMI ID calculation code entirely. Now we use a static list of
nodes, and explicitly manage how many processes are running on each
node. When a new group is launched this is updated. This allows for
better load-balancing of dynamically spawned processes. Fixes ticket
\#1434.
No reviewer.
Modified: mpich2/trunk/src/pm/hydra/include/hydra.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra.h 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/include/hydra.h 2011-02-21 20:53:45 UTC (rev 8001)
@@ -252,7 +252,10 @@
struct HYD_node {
char *hostname;
int core_count;
+ int active_processes;
+ int node_id;
+
/* Username */
char *user;
@@ -264,7 +267,7 @@
/* Proxy information */
struct HYD_proxy {
- struct HYD_node node;
+ struct HYD_node *node;
struct HYD_pg *pg; /* Back pointer to the PG */
@@ -272,9 +275,11 @@
int proxy_id;
- int start_pid;
int proxy_process_count;
+ /* Filler processes that we are adding on this proxy */
+ int filler_processes;
+
struct HYD_exec *exec_list;
int *pid;
@@ -419,17 +424,15 @@
void HYDU_init_global_env(struct HYD_env_global *global_env);
void HYDU_finalize_global_env(struct HYD_env_global *global_env);
HYD_status HYDU_alloc_node(struct HYD_node **node);
-void HYDU_dup_node(struct HYD_node src, struct HYD_node *dest);
void HYDU_free_node_list(struct HYD_node *node_list);
void HYDU_init_pg(struct HYD_pg *pg, int pgid);
HYD_status HYDU_alloc_pg(struct HYD_pg **pg, int pgid);
void HYDU_free_pg_list(struct HYD_pg *pg_list);
-HYD_status HYDU_alloc_proxy(struct HYD_proxy **proxy, struct HYD_pg *pg);
void HYDU_free_proxy_list(struct HYD_proxy *proxy_list);
HYD_status HYDU_alloc_exec(struct HYD_exec **exec);
void HYDU_free_exec_list(struct HYD_exec *exec_list);
HYD_status HYDU_create_proxy_list(struct HYD_exec *exec_list, struct HYD_node *node_list,
- struct HYD_pg *pg, int proc_offset);
+ struct HYD_pg *pg);
HYD_status HYDU_correct_wdir(char **wdir);
/* args */
@@ -473,8 +476,6 @@
struct HYDT_bind_cpuset_t cpuset);
/* others */
-int HYDU_local_to_global_id(int local_id, int start_pid, int core_count,
- int global_core_count);
HYD_status HYDU_add_to_node_list(const char *hostname, int num_procs,
struct HYD_node **node_list);
HYD_status HYDU_gethostname(char *hostname);
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip.c 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip.c 2011-02-21 20:53:45 UTC (rev 8001)
@@ -19,7 +19,13 @@
HYDU_init_user_global(&HYD_pmcd_pmip.user_global);
- HYD_pmcd_pmip.system_global.global_core_count = -1;
+ HYD_pmcd_pmip.system_global.global_core_map.left = -1;
+ HYD_pmcd_pmip.system_global.global_core_map.current = -1;
+ HYD_pmcd_pmip.system_global.global_core_map.right = -1;
+ HYD_pmcd_pmip.system_global.filler_process_map.left = -1;
+ HYD_pmcd_pmip.system_global.filler_process_map.current = -1;
+ HYD_pmcd_pmip.system_global.filler_process_map.right = -1;
+
HYD_pmcd_pmip.system_global.global_process_count = -1;
HYD_pmcd_pmip.system_global.jobid = NULL;
HYD_pmcd_pmip.system_global.pmi_port = NULL;
@@ -48,7 +54,6 @@
HYD_pmcd_pmip.local.proxy_core_count = -1;
HYD_pmcd_pmip.local.proxy_process_count = -1;
- HYD_pmcd_pmip.start_pid = -1;
HYD_pmcd_pmip.exec_list = NULL;
status = HYD_pmcd_pmi_allocate_kvs(&HYD_pmcd_pmip.local.kvs, -1);
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip.h
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip.h 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip.h 2011-02-21 20:53:45 UTC (rev 8001)
@@ -10,11 +10,20 @@
#include "hydra.h"
#include "common.h"
+struct HYD_pmcd_pmip_map {
+ int left;
+ int current;
+ int right;
+ int total;
+};
+
struct HYD_pmcd_pmip {
struct HYD_user_global user_global;
struct {
- int global_core_count;
+ struct HYD_pmcd_pmip_map global_core_map;
+ struct HYD_pmcd_pmip_map filler_process_map;
+
int global_process_count;
char *jobid;
@@ -63,7 +72,6 @@
} local;
/* Process segmentation information for this proxy */
- int start_pid;
struct HYD_exec *exec_list;
};
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_cb.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_cb.c 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_cb.c 2011-02-21 20:53:45 UTC (rev 8001)
@@ -442,6 +442,34 @@
goto fn_exit;
}
+static int local_to_global_id(int local_id)
+{
+ int rem1, layer, rem2;
+ int ret;
+
+ if (local_id < HYD_pmcd_pmip.system_global.filler_process_map.current)
+ ret = HYD_pmcd_pmip.system_global.filler_process_map.left + local_id;
+ else {
+ /* rem1 gives the number of processes remaining after the
+ * filling the holes */
+ rem1 = local_id - HYD_pmcd_pmip.system_global.filler_process_map.current;
+
+ /* layer gives the layer of filling in which our process lies
+ * starting from layer 0; in each layer, we fill all proxies
+ * in the global list */
+ layer = rem1 / HYD_pmcd_pmip.system_global.global_core_map.current;
+
+ /* rem2 gives our relative index in the layer we belong to */
+ rem2 = rem1 % HYD_pmcd_pmip.system_global.global_core_map.current;
+
+ ret = (HYD_pmcd_pmip.system_global.filler_process_map.total +
+ (layer * HYD_pmcd_pmip.system_global.global_core_map.total) +
+ HYD_pmcd_pmip.system_global.global_core_map.left + rem2);
+ }
+
+ return ret;
+}
+
static HYD_status launch_procs(void)
{
int i, j, arg, process_id;
@@ -491,10 +519,7 @@
HYD_pmcd_pmip.downstream.pmi_fd_active[i] = 0;
if (HYD_pmcd_pmip.system_global.pmi_rank == -1)
- HYD_pmcd_pmip.downstream.pmi_rank[i] =
- HYDU_local_to_global_id(i, HYD_pmcd_pmip.start_pid,
- HYD_pmcd_pmip.local.proxy_core_count,
- HYD_pmcd_pmip.system_global.global_core_count);
+ HYD_pmcd_pmip.downstream.pmi_rank[i] = local_to_global_id(i);
else
HYD_pmcd_pmip.downstream.pmi_rank[i] = HYD_pmcd_pmip.system_global.pmi_rank;
}
@@ -769,15 +794,27 @@
} while (1);
/* verify the arguments we got */
- if (HYD_pmcd_pmip.system_global.global_core_count == -1)
- HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "global core count not available\n");
+ if (HYD_pmcd_pmip.system_global.global_core_map.left == -1 ||
+ HYD_pmcd_pmip.system_global.global_core_map.current == -1 ||
+ HYD_pmcd_pmip.system_global.global_core_map.right == -1)
+ HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
+ "cannot find global core map (%d,%d,%d)\n",
+ HYD_pmcd_pmip.system_global.global_core_map.left,
+ HYD_pmcd_pmip.system_global.global_core_map.current,
+ HYD_pmcd_pmip.system_global.global_core_map.right);
+ if (HYD_pmcd_pmip.system_global.filler_process_map.left == -1 ||
+ HYD_pmcd_pmip.system_global.filler_process_map.current == -1 ||
+ HYD_pmcd_pmip.system_global.filler_process_map.right == -1)
+ HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
+ "cannot find available cores (%d,%d,%d)\n",
+ HYD_pmcd_pmip.system_global.filler_process_map.left,
+ HYD_pmcd_pmip.system_global.filler_process_map.current,
+ HYD_pmcd_pmip.system_global.filler_process_map.right);
+
if (HYD_pmcd_pmip.local.proxy_core_count == -1)
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "proxy core count not available\n");
- if (HYD_pmcd_pmip.start_pid == -1)
- HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "start PID not available\n");
-
if (HYD_pmcd_pmip.exec_list == NULL && HYD_pmcd_pmip.user_global.ckpoint_prefix == NULL)
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
"no executable given and doesn't look like a restart either\n");
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_pmi_v1.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_pmi_v1.c 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_pmi_v1.c 2011-02-21 20:53:45 UTC (rev 8001)
@@ -297,7 +297,7 @@
i = 0;
tmp[i++] = HYDU_strdup("cmd=universe_size size=");
- tmp[i++] = HYDU_int_to_str(HYD_pmcd_pmip.system_global.global_core_count);
+ tmp[i++] = HYDU_int_to_str(HYD_pmcd_pmip.system_global.global_core_map.total);
tmp[i++] = HYDU_strdup("\n");
tmp[i++] = NULL;
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_utils.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_utils.c 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_utils.c 2011-02-21 20:53:45 UTC (rev 8001)
@@ -206,11 +206,89 @@
return HYDU_set_str_and_incr(arg, argv, &HYD_pmcd_pmip.user_global.global_env.prop);
}
-static HYD_status global_core_count_fn(char *arg, char ***argv)
+static HYD_status split_map(char *map, int *left, int *current, int *right)
{
- return HYDU_set_int_and_incr(arg, argv, &HYD_pmcd_pmip.system_global.global_core_count);
+ char *tmp;
+ HYD_status status = HYD_SUCCESS;
+
+ tmp = strtok(map, ",");
+ HYDU_ASSERT(tmp, status);
+ *left = atoi(tmp);
+
+ tmp = strtok(NULL, ",");
+ HYDU_ASSERT(tmp, status);
+ *current = atoi(tmp);
+
+ tmp = strtok(NULL, ",");
+ HYDU_ASSERT(tmp, status);
+ *right = atoi(tmp);
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ goto fn_exit;
}
+static HYD_status global_core_map_fn(char *arg, char ***argv)
+{
+ char *map;
+ HYD_status status = HYD_SUCCESS;
+
+ /* Split the core map into three different segments */
+ map = HYDU_strdup(**argv);
+ HYDU_ASSERT(map, status);
+
+ status = split_map(map, &HYD_pmcd_pmip.system_global.global_core_map.left,
+ &HYD_pmcd_pmip.system_global.global_core_map.current,
+ &HYD_pmcd_pmip.system_global.global_core_map.right);
+ HYDU_ERR_POP(status, "unable to split the provided mapping\n");
+
+ HYD_pmcd_pmip.system_global.global_core_map.total =
+ HYD_pmcd_pmip.system_global.global_core_map.left +
+ HYD_pmcd_pmip.system_global.global_core_map.current +
+ HYD_pmcd_pmip.system_global.global_core_map.right;
+
+ (*argv)++;
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ goto fn_exit;
+}
+
+static HYD_status filler_process_map_fn(char *arg, char ***argv)
+{
+ char *map;
+ HYD_status status = HYD_SUCCESS;
+
+ /* Split the core map into three different segments */
+ map = HYDU_strdup(**argv);
+ HYDU_ASSERT(map, status);
+
+ status = split_map(map, &HYD_pmcd_pmip.system_global.filler_process_map.left,
+ &HYD_pmcd_pmip.system_global.filler_process_map.current,
+ &HYD_pmcd_pmip.system_global.filler_process_map.right);
+ HYDU_ERR_POP(status, "unable to split the provided mapping\n");
+
+ HYD_pmcd_pmip.system_global.filler_process_map.total =
+ HYD_pmcd_pmip.system_global.filler_process_map.left +
+ HYD_pmcd_pmip.system_global.filler_process_map.current +
+ HYD_pmcd_pmip.system_global.filler_process_map.right;
+
+ (*argv)++;
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ goto fn_exit;
+}
+
static HYD_status global_process_count_fn(char *arg, char ***argv)
{
return HYDU_set_int_and_incr(arg, argv, &HYD_pmcd_pmip.system_global.global_process_count);
@@ -254,11 +332,6 @@
return HYDU_set_int_and_incr(arg, argv, &HYD_pmcd_pmip.local.proxy_core_count);
}
-static HYD_status start_pid_fn(char *arg, char ***argv)
-{
- return HYDU_set_int_and_incr(arg, argv, &HYD_pmcd_pmip.start_pid);
-}
-
static HYD_status exec_fn(char *arg, char ***argv)
{
struct HYD_exec *exec = NULL;
@@ -406,14 +479,14 @@
{"global-system-env", global_env_fn, NULL},
{"global-user-env", global_env_fn, NULL},
{"genv-prop", genv_prop_fn, NULL},
- {"global-core-count", global_core_count_fn, NULL},
+ {"global-core-map", global_core_map_fn, NULL},
+ {"filler-process-map", filler_process_map_fn, NULL},
{"global-process-count", global_process_count_fn, NULL},
{"version", version_fn, NULL},
{"interface-env-name", interface_env_name_fn, NULL},
{"hostname", hostname_fn, NULL},
{"local-binding", local_binding_fn, NULL},
{"proxy-core-count", proxy_core_count_fn, NULL},
- {"start-pid", start_pid_fn, NULL},
{"exec", exec_fn, NULL},
{"exec-appnum", exec_appnum_fn, NULL},
{"exec-proc-count", exec_proc_count_fn, NULL},
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_cb.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_cb.c 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_cb.c 2011-02-21 20:53:45 UTC (rev 8001)
@@ -117,6 +117,10 @@
if (pg->pgid == 0)
HYDT_dbg_free_procdesc();
+ /* Reset the node allocations for this PG */
+ for (tproxy = pg->proxy_list; tproxy; tproxy = tproxy->next)
+ tproxy->node->active_processes -= tproxy->proxy_process_count;
+
fn_exit:
HYDU_FUNC_EXIT();
return status;
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmci.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmci.c 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmci.c 2011-02-21 20:53:45 UTC (rev 8001)
@@ -99,7 +99,6 @@
HYD_status HYD_pmci_launch_procs(void)
{
struct HYD_proxy *proxy;
- struct HYD_node *node_list = NULL, *node, *tnode;
char *proxy_args[HYD_NUM_TMP_STRINGS] = { NULL }, *control_port = NULL;
int node_count, i, *control_fd;
HYD_status status = HYD_SUCCESS;
@@ -113,25 +112,6 @@
status = HYD_pmcd_pmi_alloc_pg_scratch(&HYD_server_info.pg_list);
HYDU_ERR_POP(status, "error allocating pg scratch space\n");
- /* Copy the host list to pass to the launcher */
- node_list = NULL;
- node_count = 0;
- for (proxy = HYD_server_info.pg_list.proxy_list; proxy; proxy = proxy->next) {
- HYDU_alloc_node(&node);
- HYDU_dup_node(proxy->node, node);
- node->next = NULL;
-
- if (node_list == NULL) {
- node_list = node;
- }
- else {
- for (tnode = node_list; tnode->next; tnode = tnode->next);
- tnode->next = node;
- }
-
- node_count++;
- }
-
status = HYDU_sock_create_and_listen_portstr(HYD_server_info.user_global.iface,
HYD_server_info.local_hostname,
HYD_server_info.port_range, &control_port,
@@ -147,6 +127,10 @@
status = HYD_pmcd_pmi_fill_in_exec_launch_info(&HYD_server_info.pg_list);
HYDU_ERR_POP(status, "unable to fill in executable arguments\n");
+ node_count = 0;
+ for (proxy = HYD_server_info.pg_list.proxy_list; proxy; proxy = proxy->next)
+ node_count++;
+
HYDU_MALLOC(control_fd, int *, node_count * sizeof(int), status);
for (i = 0; i < node_count; i++)
control_fd[i] = HYD_FD_UNSET;
@@ -156,7 +140,8 @@
HYD_server_info.user_global.bindlib);
HYDU_ERR_POP(status, "unable to initializing binding library");
- status = HYDT_bsci_launch_procs(proxy_args, node_list, control_fd);
+ status =
+ HYDT_bsci_launch_procs(proxy_args, HYD_server_info.pg_list.proxy_list, control_fd);
HYDU_ERR_POP(status, "launcher cannot launch processes\n");
for (i = 0, proxy = HYD_server_info.pg_list.proxy_list; proxy; proxy = proxy->next, i++)
@@ -174,7 +159,6 @@
if (control_port)
HYDU_FREE(control_port);
HYDU_free_strlist(proxy_args);
- HYDU_free_node_list(node_list);
HYDU_FUNC_EXIT();
return status;
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v1.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v1.c 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v1.c 2011-02-21 20:53:45 UTC (rev 8001)
@@ -262,7 +262,7 @@
{
struct HYD_pg *pg;
struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
- struct HYD_node *node_list = NULL, *node, *tnode, *user_node_list = NULL;
+ struct HYD_node *user_node_list = NULL;
struct HYD_proxy *proxy;
struct HYD_pmcd_token *tokens;
struct HYD_exec *exec_list = NULL, *exec;
@@ -506,11 +506,11 @@
offset += pg->pg_process_count;
if (user_node_list) {
- status = HYDU_create_proxy_list(exec_list, user_node_list, pg, 0);
+ status = HYDU_create_proxy_list(exec_list, user_node_list, pg);
HYDU_ERR_POP(status, "error creating proxy list\n");
}
else {
- status = HYDU_create_proxy_list(exec_list, HYD_server_info.node_list, pg, offset);
+ status = HYDU_create_proxy_list(exec_list, HYD_server_info.node_list, pg);
HYDU_ERR_POP(status, "error creating proxy list\n");
}
HYDU_free_exec_list(exec_list);
@@ -527,23 +527,6 @@
/* Go to the last PG */
for (pg = &HYD_server_info.pg_list; pg->next; pg = pg->next);
- /* Copy the host list to pass to the launcher */
- node_list = NULL;
- for (proxy = pg->proxy_list; proxy; proxy = proxy->next) {
- HYDU_alloc_node(&node);
- node->hostname = HYDU_strdup(proxy->node.hostname);
- node->core_count = proxy->node.core_count;
- node->next = NULL;
-
- if (node_list == NULL) {
- node_list = node;
- }
- else {
- for (tnode = node_list; tnode->next; tnode = tnode->next);
- tnode->next = node;
- }
- }
-
status = HYD_pmcd_pmi_fill_in_proxy_args(proxy_args, control_port, new_pgid);
HYDU_ERR_POP(status, "unable to fill in proxy arguments\n");
HYDU_FREE(control_port);
@@ -551,9 +534,8 @@
status = HYD_pmcd_pmi_fill_in_exec_launch_info(pg);
HYDU_ERR_POP(status, "unable to fill in executable arguments\n");
- status = HYDT_bsci_launch_procs(proxy_args, node_list, NULL);
+ status = HYDT_bsci_launch_procs(proxy_args, pg->proxy_list, NULL);
HYDU_ERR_POP(status, "launcher cannot launch processes\n");
- HYDU_free_node_list(node_list);
{
char *cmd_str[HYD_NUM_TMP_STRINGS], *cmd;
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v2.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v2.c 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v2.c 2011-02-21 20:53:45 UTC (rev 8001)
@@ -455,7 +455,7 @@
{
struct HYD_pg *pg;
struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
- struct HYD_node *node_list = NULL, *node, *tnode, *user_node_list = NULL;
+ struct HYD_node *user_node_list = NULL;
struct HYD_proxy *proxy;
struct HYD_pmcd_token *tokens;
struct HYD_exec *exec_list = NULL, *exec;
@@ -687,11 +687,11 @@
offset += pg->pg_process_count;
if (user_node_list) {
- status = HYDU_create_proxy_list(exec_list, user_node_list, pg, 0);
+ status = HYDU_create_proxy_list(exec_list, user_node_list, pg);
HYDU_ERR_POP(status, "error creating proxy list\n");
}
else {
- status = HYDU_create_proxy_list(exec_list, HYD_server_info.node_list, pg, offset);
+ status = HYDU_create_proxy_list(exec_list, HYD_server_info.node_list, pg);
HYDU_ERR_POP(status, "error creating proxy list\n");
}
HYDU_free_exec_list(exec_list);
@@ -708,23 +708,6 @@
/* Go to the last PG */
for (pg = &HYD_server_info.pg_list; pg->next; pg = pg->next);
- /* Copy the host list to pass to the launcher */
- node_list = NULL;
- for (proxy = pg->proxy_list; proxy; proxy = proxy->next) {
- HYDU_alloc_node(&node);
- node->hostname = HYDU_strdup(proxy->node.hostname);
- node->core_count = proxy->node.core_count;
- node->next = NULL;
-
- if (node_list == NULL) {
- node_list = node;
- }
- else {
- for (tnode = node_list; tnode->next; tnode = tnode->next);
- tnode->next = node;
- }
- }
-
status = HYD_pmcd_pmi_fill_in_proxy_args(proxy_args, control_port, new_pgid);
HYDU_ERR_POP(status, "unable to fill in proxy arguments\n");
HYDU_FREE(control_port);
@@ -732,9 +715,8 @@
status = HYD_pmcd_pmi_fill_in_exec_launch_info(pg);
HYDU_ERR_POP(status, "unable to fill in executable arguments\n");
- status = HYDT_bsci_launch_procs(proxy_args, node_list, NULL);
+ status = HYDT_bsci_launch_procs(proxy_args, pg->proxy_list, NULL);
HYDU_ERR_POP(status, "launcher cannot launch processes\n");
- HYDU_free_node_list(node_list);
{
char *cmd_str[HYD_NUM_TMP_STRINGS], *cmd;
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_utils.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_utils.c 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_utils.c 2011-02-21 20:53:45 UTC (rev 8001)
@@ -136,25 +136,25 @@
HYDU_MALLOC(block, struct block *, sizeof(struct block), status);
block->start_idx = proxy->proxy_id;
block->num_blocks = 1;
- block->block_size = proxy->node.core_count;
+ block->block_size = proxy->node->core_count;
block->next = NULL;
blocklist_tail = blocklist_head = block;
}
else if (blocklist_tail->start_idx + blocklist_tail->num_blocks == proxy->proxy_id &&
- blocklist_tail->block_size == proxy->node.core_count) {
+ blocklist_tail->block_size == proxy->node->core_count) {
blocklist_tail->num_blocks++;
}
else if (blocklist_tail->start_idx == proxy->proxy_id &&
blocklist_tail->num_blocks == 1) {
- blocklist_tail->block_size += proxy->node.core_count;
+ blocklist_tail->block_size += proxy->node->core_count;
}
else {
HYDU_MALLOC(blocklist_tail->next, struct block *, sizeof(struct block), status);
blocklist_tail = blocklist_tail->next;
blocklist_tail->start_idx = proxy->proxy_id;
blocklist_tail->num_blocks = 1;
- blocklist_tail->block_size = proxy->node.core_count;
+ blocklist_tail->block_size = proxy->node->core_count;
blocklist_tail->next = NULL;
}
}
@@ -225,8 +225,10 @@
struct HYD_exec *exec;
struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
char *mapping = NULL;
- char *pmi_fd = NULL, *pmi_port = NULL;
- int pmi_rank, ret;
+ char *pmi_fd = NULL, *pmi_port = NULL, *map = NULL;
+ int pmi_rank, ret, left_global_cores, right_global_cores;
+ int left_filler_processes, right_filler_processes;
+ char *tmp[HYD_NUM_TMP_STRINGS];
HYD_status status = HYD_SUCCESS;
status = pmi_process_mapping(pg, &mapping);
@@ -241,6 +243,14 @@
/* Create the arguments list for each proxy */
process_id = 0;
+ right_global_cores = HYD_server_info.global_core_count;
+ left_global_cores = 0;
+
+ right_filler_processes = 0;
+ for (proxy = pg->proxy_list; proxy; proxy = proxy->next)
+ right_filler_processes += proxy->filler_processes;
+ left_filler_processes = 0;
+
for (proxy = pg->proxy_list; proxy; proxy = proxy->next) {
for (inherited_env_count = 0, env = HYD_server_info.user_global.global_env.inherited;
env; env = env->next, inherited_env_count++);
@@ -277,11 +287,53 @@
}
proxy->exec_launch_info[arg++] = HYDU_strdup("--hostname");
- proxy->exec_launch_info[arg++] = HYDU_strdup(proxy->node.hostname);
+ proxy->exec_launch_info[arg++] = HYDU_strdup(proxy->node->hostname);
- proxy->exec_launch_info[arg++] = HYDU_strdup("--global-core-count");
- proxy->exec_launch_info[arg++] = HYDU_int_to_str(HYD_server_info.global_core_count);
+ /* A map has three fields -- the entire system is considered
+ * to have three nodes; the nodes on the left of the current
+ * node are all grouped into one node, and the nodes to the
+ * right are grouped into another. */
+ /* Global core map */
+ right_global_cores -= proxy->node->core_count;
+
+ proxy->exec_launch_info[arg++] = HYDU_strdup("--global-core-map");
+ tmp[0] = HYDU_int_to_str(left_global_cores);
+ tmp[1] = HYDU_strdup(",");
+ tmp[2] = HYDU_int_to_str(proxy->node->core_count);
+ tmp[3] = HYDU_strdup(",");
+ tmp[4] = HYDU_int_to_str(right_global_cores);
+ tmp[5] = NULL;
+ status = HYDU_str_alloc_and_join(tmp, &map);
+ HYDU_ERR_POP(status, "unable to join strings\n");
+
+ proxy->exec_launch_info[arg++] = map;
+ HYDU_free_strlist(tmp);
+
+ left_global_cores += proxy->node->core_count;
+
+ /* Filler process map */
+ right_filler_processes -= proxy->filler_processes;
+
+ proxy->exec_launch_info[arg++] = HYDU_strdup("--filler-process-map");
+ tmp[0] = HYDU_int_to_str(left_filler_processes);
+ tmp[1] = HYDU_strdup(",");
+ tmp[2] = HYDU_int_to_str(proxy->filler_processes);
+ tmp[3] = HYDU_strdup(",");
+ tmp[4] = HYDU_int_to_str(right_filler_processes);
+ tmp[5] = NULL;
+ status = HYDU_str_alloc_and_join(tmp, &map);
+ HYDU_ERR_POP(status, "unable to join strings\n");
+
+ HYDU_ASSERT(left_filler_processes >= 0, status);
+ HYDU_ASSERT(proxy->filler_processes >= 0, status);
+ HYDU_ASSERT(right_filler_processes >= 0, status);
+
+ proxy->exec_launch_info[arg++] = map;
+ HYDU_free_strlist(tmp);
+
+ left_filler_processes += proxy->filler_processes;
+
proxy->exec_launch_info[arg++] = HYDU_strdup("--global-process-count");
proxy->exec_launch_info[arg++] = HYDU_int_to_str(pg->pg_process_count);
@@ -355,9 +407,9 @@
proxy->exec_launch_info[arg++] = HYDU_strdup("--pmi-process-mapping");
proxy->exec_launch_info[arg++] = HYDU_strdup(mapping);
- if (proxy->node.local_binding) {
+ if (proxy->node->local_binding) {
proxy->exec_launch_info[arg++] = HYDU_strdup("--local-binding");
- proxy->exec_launch_info[arg++] = HYDU_strdup(proxy->node.local_binding);
+ proxy->exec_launch_info[arg++] = HYDU_strdup(proxy->node->local_binding);
}
if (HYD_server_info.user_global.binding) {
@@ -430,11 +482,8 @@
HYDU_strdup(HYD_server_info.user_global.global_env.prop);
}
- proxy->exec_launch_info[arg++] = HYDU_strdup("--start-pid");
- proxy->exec_launch_info[arg++] = HYDU_int_to_str(proxy->start_pid);
-
proxy->exec_launch_info[arg++] = HYDU_strdup("--proxy-core-count");
- proxy->exec_launch_info[arg++] = HYDU_int_to_str(proxy->node.core_count);
+ proxy->exec_launch_info[arg++] = HYDU_int_to_str(proxy->node->core_count);
proxy->exec_launch_info[arg++] = NULL;
/* Now pass the local executable information */
Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/external/external.h
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/external/external.h 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/external/external.h 2011-02-21 20:53:45 UTC (rev 8001)
@@ -14,7 +14,7 @@
#include "sge.h"
#include "pbs.h"
-HYD_status HYDT_bscd_external_launch_procs(char **args, struct HYD_node *node_list,
+HYD_status HYDT_bscd_external_launch_procs(char **args, struct HYD_proxy *proxy_list,
int *control_fd);
HYD_status HYDT_bscd_external_launcher_finalize(void);
HYD_status HYDT_bscd_external_query_env_inherit(const char *env_name, int *ret);
Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/external/external_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/external/external_launch.c 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/external/external_launch.c 2011-02-21 20:53:45 UTC (rev 8001)
@@ -97,13 +97,13 @@
goto fn_exit;
}
-HYD_status HYDT_bscd_external_launch_procs(char **args, struct HYD_node *node_list,
+HYD_status HYDT_bscd_external_launch_procs(char **args, struct HYD_proxy *proxy_list,
int *control_fd)
{
int num_hosts, idx, i, host_idx, fd, exec_idx, offset, lh, len;
int *pid, *fd_list, *dummy;
int sockpair[2];
- struct HYD_node *node;
+ struct HYD_proxy *proxy;
char *targs[HYD_NUM_TMP_STRINGS], *path = NULL, *extra_arg_list = NULL, *extra_arg;
char quoted_exec_string[HYD_TMP_STRLEN], *original_exec_string;
struct HYD_env *env = NULL;
@@ -181,7 +181,7 @@
/* pid_list might already have some PIDs */
num_hosts = 0;
- for (node = node_list; node; node = node->next)
+ for (proxy = proxy_list; proxy; proxy = proxy->next)
num_hosts++;
/* Increase pid list to accommodate these new pids */
@@ -201,18 +201,19 @@
targs[idx] = NULL;
HYDT_bind_cpuset_zero(&cpuset);
- for (i = 0, node = node_list; node; node = node->next, i++) {
+ for (i = 0, proxy = proxy_list; proxy; proxy = proxy->next, i++) {
if (targs[host_idx])
HYDU_FREE(targs[host_idx]);
- if (node->user == NULL) {
- targs[host_idx] = HYDU_strdup(node->hostname);
+ if (proxy->node->user == NULL) {
+ targs[host_idx] = HYDU_strdup(proxy->node->hostname);
}
else {
- len = strlen(node->user) + strlen("@") + strlen(node->hostname) + 1;
+ len = strlen(proxy->node->user) + strlen("@") + strlen(proxy->node->hostname) + 1;
HYDU_MALLOC(targs[host_idx], char *, len, status);
- MPL_snprintf(targs[host_idx], len, "%s@%s", node->user, node->hostname);
+ MPL_snprintf(targs[host_idx], len, "%s@%s", proxy->node->user,
+ proxy->node->hostname);
}
/* append proxy ID */
@@ -227,11 +228,11 @@
* connections causing the job to fail. This is basically a
* hack to slow down ssh connections to the same node. */
if (!strcmp(HYDT_bsci_info.launcher, "ssh")) {
- status = HYDT_bscd_ssh_store_launch_time(node->hostname);
+ status = HYDT_bscd_ssh_store_launch_time(proxy->node->hostname);
HYDU_ERR_POP(status, "error storing launch time\n");
}
- status = HYDU_sock_is_local(node->hostname, &lh);
+ status = HYDU_sock_is_local(proxy->node->hostname, &lh);
HYDU_ERR_POP(status, "error checking if node is localhost\n");
/* If launcher is 'fork', or this is the localhost, use fork
Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/external/ll.h
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/external/ll.h 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/external/ll.h 2011-02-21 20:53:45 UTC (rev 8001)
@@ -9,7 +9,8 @@
#include "hydra.h"
-HYD_status HYDT_bscd_ll_launch_procs(char **args, struct HYD_node *node_list, int *control_fd);
+HYD_status HYDT_bscd_ll_launch_procs(char **args, struct HYD_proxy *proxy_list,
+ int *control_fd);
HYD_status HYDT_bscd_ll_query_proxy_id(int *proxy_id);
HYD_status HYDT_bscd_ll_query_node_list(struct HYD_node **node_list);
HYD_status HYDTI_bscd_ll_query_node_count(int *count);
Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/external/ll_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/external/ll_launch.c 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/external/ll_launch.c 2011-02-21 20:53:45 UTC (rev 8001)
@@ -12,13 +12,14 @@
static int fd_stdout, fd_stderr;
-HYD_status HYDT_bscd_ll_launch_procs(char **args, struct HYD_node *node_list, int *control_fd)
+HYD_status HYDT_bscd_ll_launch_procs(char **args, struct HYD_proxy *proxy_list,
+ int *control_fd)
{
int idx, i, total_procs, node_count;
int *pid, *fd_list, exec_idx;
char *targs[HYD_NUM_TMP_STRINGS], *node_list_str = NULL;
char *path = NULL, *extra_arg_list = NULL, *extra_arg, quoted_exec_string[HYD_TMP_STRLEN];
- struct HYD_node *node;
+ struct HYD_proxy *proxy;
struct HYDT_bind_cpuset_t cpuset;
HYD_status status = HYD_SUCCESS;
@@ -47,7 +48,7 @@
HYDU_ERR_POP(status, "unable to query for the node count\n");
node_count = 0;
- for (node = node_list; node; node = node->next)
+ for (proxy = proxy_list; proxy; proxy = proxy->next)
node_count++;
if (total_procs != node_count)
Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm.h
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm.h 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm.h 2011-02-21 20:53:45 UTC (rev 8001)
@@ -9,7 +9,7 @@
#include "hydra.h"
-HYD_status HYDT_bscd_slurm_launch_procs(char **args, struct HYD_node *node_list,
+HYD_status HYDT_bscd_slurm_launch_procs(char **args, struct HYD_proxy *proxy_list,
int *control_fd);
HYD_status HYDT_bscd_slurm_query_proxy_id(int *proxy_id);
HYD_status HYDT_bscd_slurm_query_node_list(struct HYD_node **node_list);
Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm_launch.c 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm_launch.c 2011-02-21 20:53:45 UTC (rev 8001)
@@ -12,20 +12,20 @@
static int fd_stdout, fd_stderr;
-static HYD_status node_list_to_str(struct HYD_node *node_list, char **node_list_str)
+static HYD_status proxy_list_to_node_str(struct HYD_proxy *proxy_list, char **node_list_str)
{
int i;
char *tmp[HYD_NUM_TMP_STRINGS], *foo = NULL;
- struct HYD_node *node;
+ struct HYD_proxy *proxy;
HYD_status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
i = 0;
- for (node = node_list; node; node = node->next) {
- tmp[i++] = HYDU_strdup(node->hostname);
+ for (proxy = proxy_list; proxy; proxy = proxy->next) {
+ tmp[i++] = HYDU_strdup(proxy->node->hostname);
- if (node->next)
+ if (proxy->node->next)
tmp[i++] = HYDU_strdup(",");
/* If we used up more than half of the array elements, merge
@@ -59,7 +59,7 @@
goto fn_exit;
}
-HYD_status HYDT_bscd_slurm_launch_procs(char **args, struct HYD_node *node_list,
+HYD_status HYDT_bscd_slurm_launch_procs(char **args, struct HYD_proxy *proxy_list,
int *control_fd)
{
int num_hosts, idx, i, exec_idx;
@@ -67,7 +67,7 @@
char *targs[HYD_NUM_TMP_STRINGS], *node_list_str = NULL,
quoted_exec_string[HYD_TMP_STRLEN];
char *path = NULL, *extra_arg_list = NULL, *extra_arg;
- struct HYD_node *node;
+ struct HYD_proxy *proxy;
struct HYDT_bind_cpuset_t cpuset;
HYD_status status = HYD_SUCCESS;
@@ -89,14 +89,14 @@
if (!strcmp(HYDT_bsci_info.rmk, "slurm")) {
targs[idx++] = HYDU_strdup("--nodelist");
- status = node_list_to_str(node_list, &node_list_str);
+ status = proxy_list_to_node_str(proxy_list, &node_list_str);
HYDU_ERR_POP(status, "unable to build a node list string\n");
targs[idx++] = HYDU_strdup(node_list_str);
}
num_hosts = 0;
- for (node = node_list; node; node = node->next)
+ for (proxy = proxy_list; proxy; proxy = proxy->next)
num_hosts++;
targs[idx++] = HYDU_strdup("-N");
Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/include/bsci.h
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/include/bsci.h 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/include/bsci.h 2011-02-21 20:53:45 UTC (rev 8001)
@@ -54,7 +54,7 @@
/* Launcher functions */
/** \brief Launch processes */
- HYD_status(*launch_procs) (char **args, struct HYD_node * node_list, int *control_fd);
+ HYD_status(*launch_procs) (char **args, struct HYD_proxy * proxy_list, int *control_fd);
/** \brief Finalize the bootstrap control device */
HYD_status(*launcher_finalize) (void);
@@ -96,7 +96,7 @@
* \brief HYDT_bsci_launch_procs - Launch processes
*
* \param[in] args Arguments to be used for the launched processes
- * \param[in] node_list List of nodes to launch processes on
+ * \param[in] proxy_list List of proxies to launch
* \param[out] control_fd Control socket to communicate with the launched process
* \param[in] stdout_cb Stdout callback function
* \param[in] stderr_cb Stderr callback function
@@ -112,7 +112,7 @@
* but allow proxies to query their ID information on each node using
* the HYDT_bsci_query_proxy_id function.
*/
-HYD_status HYDT_bsci_launch_procs(char **args, struct HYD_node *node_list, int *control_fd);
+HYD_status HYDT_bsci_launch_procs(char **args, struct HYD_proxy *proxy_list, int *control_fd);
/**
Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/persist/persist_client.h
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/persist/persist_client.h 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/persist/persist_client.h 2011-02-21 20:53:45 UTC (rev 8001)
@@ -11,7 +11,7 @@
#include "bscu.h"
#include "persist.h"
-HYD_status HYDT_bscd_persist_launch_procs(char **args, struct HYD_node *node_list,
+HYD_status HYDT_bscd_persist_launch_procs(char **args, struct HYD_proxy *proxy_list,
int *control_fd);
HYD_status HYDT_bscd_persist_wait_for_completion(int timeout);
Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/persist/persist_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/persist/persist_launch.c 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/persist/persist_launch.c 2011-02-21 20:53:45 UTC (rev 8001)
@@ -58,17 +58,17 @@
goto fn_exit;
}
-HYD_status HYDT_bscd_persist_launch_procs(char **args, struct HYD_node *node_list,
+HYD_status HYDT_bscd_persist_launch_procs(char **args, struct HYD_proxy *proxy_list,
int *control_fd)
{
- struct HYD_node *node;
+ struct HYD_proxy *proxy;
int idx, i;
HYD_status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
HYDT_bscd_persist_node_count = 0;
- for (node = node_list; node; node = node->next)
+ for (proxy = proxy_list; proxy; proxy = proxy->next)
HYDT_bscd_persist_node_count++;
for (idx = 0; args[idx]; idx++);
@@ -77,11 +77,11 @@
HYDU_MALLOC(HYDT_bscd_persist_control_fd, int *,
HYDT_bscd_persist_node_count * sizeof(int), status);
- for (node = node_list, i = 0; node; node = node->next, i++) {
+ for (proxy = proxy_list, i = 0; proxy; proxy = proxy->next, i++) {
args[idx] = HYDU_int_to_str(i);
/* connect to hydserv on each node */
- status = HYDU_sock_connect(node->hostname, PERSIST_DEFAULT_PORT,
+ status = HYDU_sock_connect(proxy->node->hostname, PERSIST_DEFAULT_PORT,
&HYDT_bscd_persist_control_fd[i]);
HYDU_ERR_POP(status, "unable to connect to the main server\n");
Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/src/bsci_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/src/bsci_launch.c 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/src/bsci_launch.c 2011-02-21 20:53:45 UTC (rev 8001)
@@ -7,13 +7,13 @@
#include "hydra.h"
#include "bsci.h"
-HYD_status HYDT_bsci_launch_procs(char **args, struct HYD_node *node_list, int *control_fd)
+HYD_status HYDT_bsci_launch_procs(char **args, struct HYD_proxy *proxy_list, int *control_fd)
{
HYD_status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
- status = HYDT_bsci_fns.launch_procs(args, node_list, control_fd);
+ status = HYDT_bsci_fns.launch_procs(args, proxy_list, control_fd);
HYDU_ERR_POP(status, "launcher returned error while launching processes\n");
fn_exit:
Modified: mpich2/trunk/src/pm/hydra/tools/debugger/debugger.c
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/debugger/debugger.c 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/debugger/debugger.c 2011-02-21 20:53:45 UTC (rev 8001)
@@ -40,7 +40,7 @@
j = 0;
for (exec = proxy->exec_list; exec; exec = exec->next) {
for (np = 0; np < exec->proc_count; np++) {
- MPIR_proctable[i].host_name = HYDU_strdup(proxy->node.hostname);
+ MPIR_proctable[i].host_name = HYDU_strdup(proxy->node->hostname);
MPIR_proctable[i].pid = proxy->pid[j++];
if (exec->exec[0])
MPIR_proctable[i].executable_name = HYDU_strdup(exec->exec[0]);
Modified: mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.c
===================================================================
--- mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.c 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.c 2011-02-21 20:53:45 UTC (rev 8001)
@@ -103,7 +103,6 @@
printf(" Other Hydra options:\n");
printf(" -verbose verbose mode\n");
printf(" -info build information\n");
- printf(" -print-rank-map print rank mapping\n");
printf(" -print-all-exitcodes print exit codes of all processes\n");
printf(" -iface network interface to use\n");
printf(" -ppn processes per node\n");
@@ -218,7 +217,7 @@
struct HYD_proxy *proxy;
struct HYD_exec *exec;
struct HYD_node *node;
- int exit_status = 0, i, process_id, proc_count, timeout, reset_rmk;
+ int exit_status = 0, i, timeout, reset_rmk;
HYD_status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
@@ -310,8 +309,10 @@
}
HYD_server_info.global_core_count = 0;
- for (node = HYD_server_info.node_list; node; node = node->next)
+ for (node = HYD_server_info.node_list, i = 0; node; node = node->next, i++) {
HYD_server_info.global_core_count += node->core_count;
+ node->node_id = i;
+ }
/* If the number of processes is not given, we allocate all the
* available nodes to each executable */
@@ -332,20 +333,20 @@
HYDU_ERR_POP(status, "unable to get the inherited env list\n");
status = HYDU_create_proxy_list(HYD_uii_mpx_exec_list, HYD_server_info.node_list,
- &HYD_server_info.pg_list, 0);
+ &HYD_server_info.pg_list);
HYDU_ERR_POP(status, "unable to create proxy list\n");
/* See if the node list contains a remotely accessible localhost */
for (proxy = HYD_server_info.pg_list.proxy_list; proxy; proxy = proxy->next) {
int is_local, remote_access;
- status = HYDU_sock_is_local(proxy->node.hostname, &is_local);
- HYDU_ERR_POP(status, "unable to check if %s is local\n", proxy->node.hostname);
+ status = HYDU_sock_is_local(proxy->node->hostname, &is_local);
+ HYDU_ERR_POP(status, "unable to check if %s is local\n", proxy->node->hostname);
if (is_local) {
- status = HYDU_sock_remote_access(proxy->node.hostname, &remote_access);
+ status = HYDU_sock_remote_access(proxy->node->hostname, &remote_access);
HYDU_ERR_POP(status, "unable to check if %s is remotely accessible\n",
- proxy->node.hostname);
+ proxy->node->hostname);
if (remote_access)
break;
@@ -353,7 +354,7 @@
}
if (proxy)
- HYD_server_info.local_hostname = HYDU_strdup(proxy->node.hostname);
+ HYD_server_info.local_hostname = HYDU_strdup(proxy->node->hostname);
if (HYD_server_info.user_global.debug)
HYD_uiu_print_params();
@@ -371,26 +372,6 @@
MPL_env2str("MPICH_PORT_RANGE", (const char **) &HYD_server_info.port_range))
HYD_server_info.port_range = HYDU_strdup(HYD_server_info.port_range);
- if (HYD_ui_mpich_info.print_rank_map) {
- for (proxy = HYD_server_info.pg_list.proxy_list; proxy; proxy = proxy->next) {
- HYDU_dump_noprefix(stdout, "(%s:", proxy->node.hostname);
-
- process_id = 0;
- for (exec = proxy->exec_list; exec; exec = exec->next) {
- for (i = 0; i < exec->proc_count; i++) {
- HYDU_dump_noprefix(stdout, "%d",
- HYDU_local_to_global_id(process_id++,
- proxy->start_pid,
- proxy->node.core_count,
- HYD_server_info.global_core_count));
- if (i < exec->proc_count - 1 || exec->next)
- HYDU_dump_noprefix(stdout, ",");
- }
- }
- HYDU_dump_noprefix(stdout, ")\n");
- }
- }
-
/* Add the stdout/stderr callback handlers */
HYD_server_info.stdout_cb = HYD_uiu_stdout_cb;
HYD_server_info.stderr_cb = HYD_uiu_stderr_cb;
@@ -414,24 +395,22 @@
continue;
}
- proc_count = 0;
- for (exec = proxy->exec_list; exec; exec = exec->next)
- proc_count += exec->proc_count;
- for (i = 0; i < proc_count; i++) {
+ if (HYD_ui_mpich_info.print_all_exitcodes)
+ HYDU_dump_noprefix(stdout, "[%s] ", proxy->node->hostname);
+
+ for (i = 0; i < proxy->proxy_process_count; i++) {
if (HYD_ui_mpich_info.print_all_exitcodes) {
- HYDU_dump_noprefix(stdout, "[%d]",
- HYDU_local_to_global_id(i, proxy->start_pid,
- proxy->node.core_count,
- HYD_server_info.global_core_count));
HYDU_dump_noprefix(stdout, "%d", WEXITSTATUS(proxy->exit_status[i]));
- if (i < proc_count - 1)
+ if (i < proxy->proxy_process_count - 1)
HYDU_dump_noprefix(stdout, ",");
}
+
exit_status |= proxy->exit_status[i];
}
+
+ if (HYD_ui_mpich_info.print_all_exitcodes)
+ HYDU_dump_noprefix(stdout, "\n");
}
- if (HYD_ui_mpich_info.print_all_exitcodes)
- HYDU_dump_noprefix(stdout, "\n");
/* Call finalize functions for lower layers to cleanup their resources */
status = HYD_pmci_finalize();
Modified: mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.h
===================================================================
--- mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.h 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.h 2011-02-21 20:53:45 UTC (rev 8001)
@@ -12,7 +12,6 @@
struct HYD_ui_mpich_info {
int ppn;
int ckpoint_int;
- int print_rank_map;
int print_all_exitcodes;
int ranks_per_proc;
Modified: mpich2/trunk/src/pm/hydra/ui/mpich/utils.c
===================================================================
--- mpich2/trunk/src/pm/hydra/ui/mpich/utils.c 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/ui/mpich/utils.c 2011-02-21 20:53:45 UTC (rev 8001)
@@ -16,7 +16,6 @@
{
HYD_ui_mpich_info.ppn = -1;
HYD_ui_mpich_info.ckpoint_int = -1;
- HYD_ui_mpich_info.print_rank_map = -1;
HYD_ui_mpich_info.print_all_exitcodes = -1;
HYD_ui_mpich_info.ranks_per_proc = -1;
HYD_ui_mpich_info.sort_order = NONE;
@@ -875,17 +874,6 @@
goto fn_exit;
}
-static void print_rank_map_help_fn(void)
-{
- printf("\n");
- printf("-print-rank-map: Print what ranks are allocated to what nodes\n\n");
-}
-
-static HYD_status print_rank_map_fn(char *arg, char ***argv)
-{
- return HYDU_set_int(arg, argv, &HYD_ui_mpich_info.print_rank_map, 1);
-}
-
static void print_all_exitcodes_help_fn(void)
{
printf("\n");
@@ -1048,7 +1036,6 @@
{"debug", verbose_fn, verbose_help_fn},
{"info", info_fn, info_help_fn},
{"version", info_fn, info_help_fn},
- {"print-rank-map", print_rank_map_fn, print_rank_map_help_fn},
{"print-all-exitcodes", print_all_exitcodes_fn, print_all_exitcodes_help_fn},
{"iface", iface_fn, iface_help_fn},
{"nameserver", nameserver_fn, nameserver_help_fn},
@@ -1076,9 +1063,6 @@
HYDU_ERR_POP(status, "unable to correct wdir\n");
}
- if (HYD_ui_mpich_info.print_rank_map == -1)
- HYD_ui_mpich_info.print_rank_map = 0;
-
if (HYD_ui_mpich_info.print_all_exitcodes == -1)
HYD_ui_mpich_info.print_all_exitcodes = 0;
Modified: mpich2/trunk/src/pm/hydra/ui/utils/uiu.c
===================================================================
--- mpich2/trunk/src/pm/hydra/ui/utils/uiu.c 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/ui/utils/uiu.c 2011-02-21 20:53:45 UTC (rev 8001)
@@ -150,9 +150,8 @@
for (proxy = HYD_server_info.pg_list.proxy_list; proxy; proxy = proxy->next) {
HYDU_dump_noprefix(stdout, " Proxy ID: %2d\n", i++);
HYDU_dump_noprefix(stdout, " -----------------\n");
- HYDU_dump_noprefix(stdout, " Proxy name: %s\n", proxy->node.hostname);
- HYDU_dump_noprefix(stdout, " Process count: %d\n", proxy->node.core_count);
- HYDU_dump_noprefix(stdout, " Start PID: %d\n", proxy->start_pid);
+ HYDU_dump_noprefix(stdout, " Proxy name: %s\n", proxy->node->hostname);
+ HYDU_dump_noprefix(stdout, " Process count: %d\n", proxy->node->core_count);
HYDU_dump_noprefix(stdout, "\n");
HYDU_dump_noprefix(stdout, " Proxy exec list:\n");
HYDU_dump_noprefix(stdout, " ....................\n");
@@ -231,7 +230,7 @@
if (proxy->proxy_id == proxy_id)
break;
HYDU_ASSERT(proxy, status);
- MPL_snprintf(tmp[i], HYD_TMP_STRLEN, "%s", proxy->node.hostname);
+ MPL_snprintf(tmp[i], HYD_TMP_STRLEN, "%s", proxy->node->hostname);
break;
case '\0':
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
Modified: mpich2/trunk/src/pm/hydra/utils/alloc/alloc.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/alloc/alloc.c 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/utils/alloc/alloc.c 2011-02-21 20:53:45 UTC (rev 8001)
@@ -85,15 +85,6 @@
HYDU_FREE(global_env->prop);
}
-static void init_node(struct HYD_node *node)
-{
- node->hostname = NULL;
- node->core_count = 0;
- node->user = NULL;
- node->local_binding = NULL;
- node->next = NULL;
-}
-
HYD_status HYDU_alloc_node(struct HYD_node **node)
{
HYD_status status = HYD_SUCCESS;
@@ -101,7 +92,13 @@
HYDU_FUNC_ENTER();
HYDU_MALLOC(*node, struct HYD_node *, sizeof(struct HYD_node), status);
- init_node(*node);
+ (*node)->hostname = NULL;
+ (*node)->core_count = 0;
+ (*node)->active_processes = 0;
+ (*node)->node_id = -1;
+ (*node)->user = NULL;
+ (*node)->local_binding = NULL;
+ (*node)->next = NULL;
fn_exit:
HYDU_FUNC_EXIT();
@@ -111,19 +108,6 @@
goto fn_exit;
}
-void HYDU_dup_node(struct HYD_node src, struct HYD_node *dest)
-{
- HYDU_FUNC_ENTER();
-
- dest->hostname = src.hostname ? HYDU_strdup(src.hostname) : NULL;
- dest->core_count = src.core_count;
- dest->user = src.user ? HYDU_strdup(src.user) : NULL;
- dest->local_binding = src.local_binding ? HYDU_strdup(src.local_binding) : NULL;
-
- HYDU_FUNC_EXIT();
- return;
-}
-
void HYDU_free_node_list(struct HYD_node *node_list)
{
struct HYD_node *node, *tnode;
@@ -192,7 +176,8 @@
}
}
-HYD_status HYDU_alloc_proxy(struct HYD_proxy **proxy, struct HYD_pg *pg)
+static HYD_status alloc_proxy(struct HYD_proxy **proxy, struct HYD_pg *pg,
+ struct HYD_node *node)
{
HYD_status status = HYD_SUCCESS;
@@ -200,15 +185,14 @@
HYDU_MALLOC(*proxy, struct HYD_proxy *, sizeof(struct HYD_proxy), status);
- init_node(&(*proxy)->node);
-
+ (*proxy)->node = node;
(*proxy)->pg = pg;
(*proxy)->proxy_id = -1;
(*proxy)->exec_launch_info = NULL;
- (*proxy)->start_pid = -1;
(*proxy)->proxy_process_count = 0;
+ (*proxy)->filler_processes = 0;
(*proxy)->pid = NULL;
(*proxy)->exit_status = NULL;
@@ -236,12 +220,8 @@
while (proxy) {
tproxy = proxy->next;
- if (proxy->node.hostname)
- HYDU_FREE(proxy->node.hostname);
+ proxy->node = NULL;
- if (proxy->node.local_binding)
- HYDU_FREE(proxy->node.local_binding);
-
if (proxy->exec_launch_info) {
HYDU_free_strlist(proxy->exec_launch_info);
HYDU_FREE(proxy->exec_launch_info);
@@ -351,6 +331,7 @@
texec->appnum = exec->appnum;
}
proxy->proxy_process_count += num_procs;
+ proxy->node->active_processes += num_procs;
fn_exit:
return status;
@@ -359,118 +340,179 @@
goto fn_exit;
}
+static int dceil(int x, int y)
+{
+ int z;
+
+ z = x / y;
+
+ if (z * y == x)
+ return z;
+ else
+ return z + 1;
+}
+
HYD_status HYDU_create_proxy_list(struct HYD_exec *exec_list, struct HYD_node *node_list,
- struct HYD_pg *pg, int proc_offset)
+ struct HYD_pg *pg)
{
- struct HYD_proxy *proxy = NULL;
+ struct HYD_proxy *proxy = NULL, *tproxy, *last_proxy;
struct HYD_exec *exec;
- struct HYD_node *node, *start_node;
- int proxy_rem_procs, exec_rem_procs, core_count, procs_left;
- int total_exec_procs, num_nodes, i, start_pid, offset;
+ struct HYD_node *node;
+ int pg_process_count, process_core_ratio, c, global_core_count, filler_process_count;
+ int num_procs, proxy_rem_cores, exec_rem_procs, global_active_processes, included_cores;
+ int proxy_id, global_node_count, pcr, i;
HYD_status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
- total_exec_procs = 0;
+ pg_process_count = 0;
for (exec = exec_list; exec; exec = exec->next)
- total_exec_procs += exec->proc_count;
+ pg_process_count += exec->proc_count;
+ HYDU_ASSERT(pg_process_count, status);
- num_nodes = 0;
- core_count = 0;
+ /*
+ * Find the process/core ratio that we can go to. The minimum is
+ * one (meaning there are as many processes as cores in the
+ * system). But if one of the nodes is already oversubscribed, we
+ * take that as a hint to mean that the other nodes can also be
+ * oversubscribed to the same extent.
+ */
+ process_core_ratio = 1;
+ global_node_count = 0;
+ global_core_count = 0;
+ global_active_processes = 0;
for (node = node_list; node; node = node->next) {
- num_nodes++;
- core_count += node->core_count;
+ pcr = dceil(node->active_processes, node->core_count);
+ if (pcr > process_core_ratio)
+ process_core_ratio = pcr;
+ global_node_count++;
+ global_core_count += node->core_count;
+ global_active_processes += node->active_processes;
}
- /* First create the list of proxies we need */
- offset = proc_offset % core_count;
- for (node = node_list; node; node = node->next) {
- offset -= node->core_count;
- if (offset < 0)
- break;
- }
- start_node = node;
+ /* Find the number of filler processes before we need to increase
+ * the process/core ratio */
+ filler_process_count = global_core_count * process_core_ratio - global_active_processes;
- if (offset + start_node->core_count) {
- /* we are starting on some offset within the node; the maximum
- * number of proxies can be larger than the total number of
- * nodes, since we might wrap around. */
- num_nodes++;
- }
-
- start_pid = 0;
- procs_left = total_exec_procs;
- for (i = 0, node = start_node; i < num_nodes; i++) {
- if (pg->proxy_list == NULL) {
- status = HYDU_alloc_proxy(&pg->proxy_list, pg);
- HYDU_ERR_POP(status, "unable to allocate proxy\n");
- proxy = pg->proxy_list;
+ /* Create the list of proxies required to accommodate all the
+ * processes. The proxy list follows these rules:
+ *
+ * 1. It will start at the first proxy that has a non-zero number
+ * of available cores.
+ *
+ * 2. The maximum number of proxies cannot exceed the number of
+ * nodes.
+ *
+ * 3. A proxy can never have zero processes assigned to it. The
+ * below loop does not follow this rule; we make a second pass on
+ * the list to enforce this rule.
+ */
+ pg->proxy_list = NULL;
+ last_proxy = NULL;
+ included_cores = 0;
+ pcr = process_core_ratio;
+ for (node = node_list, i = 0; i < global_node_count; node = node->next) {
+ if (node == NULL) {
+ node = node_list;
+ pcr++;
}
- else {
- status = HYDU_alloc_proxy(&proxy->next, pg);
- HYDU_ERR_POP(status, "unable to allocate proxy\n");
- proxy = proxy->next;
- }
- proxy->proxy_id = i;
- proxy->start_pid = start_pid;
- HYDU_dup_node(*node, &proxy->node);
- proxy->node.next = NULL;
+ c = (node->core_count * pcr - node->active_processes);
- /* For the first node, use only the remaining cores. For the
- * last node, we need to make sure its not oversubscribed
- * since the first proxy we started on might repeat. */
- if (i == 0)
- proxy->node.core_count = -(offset); /* offset is negative */
- else if (i == (num_nodes - 1) && (offset + start_node->core_count))
- proxy->node.core_count = node->core_count + offset;
+ if (c == 0 && included_cores == 0)
+ continue;
+
+ included_cores += c;
+
+ /* create a proxy associated with this node */
+ status = alloc_proxy(&proxy, pg, node);
+ HYDU_ERR_POP(status, "error allocating proxy\n");
+
+ proxy->filler_processes = c;
+
+ if (pg->proxy_list == NULL)
+ pg->proxy_list = proxy;
else
- proxy->node.core_count = node->core_count;
+ last_proxy->next = proxy;
+ last_proxy = proxy;
- /* If we found enough proxies, break out */
- start_pid += proxy->node.core_count;
- procs_left -= proxy->node.core_count;
- if (procs_left <= 0)
+ if (included_cores >= pg_process_count)
break;
- node = node->next;
- /* Handle the wrap around case for the nodes */
- if (node == NULL)
- node = node_list;
+ i++;
}
- /* Now fill the proxies with the appropriate executable
- * information */
- proxy = pg->proxy_list;
- exec = exec_list;
- proxy_rem_procs = proxy->node.core_count;
- exec_rem_procs = exec ? exec->proc_count : 0;
- while (exec) {
- if (exec_rem_procs <= proxy_rem_procs) {
- status = add_exec_to_proxy(exec, proxy, exec_rem_procs);
+ /* Proxy list is created; add the executables to the proxy list */
+ if (pg->proxy_list->next == NULL) {
+ /* Special case: there is only one proxy, so all executables
+ * directly get appended to this proxy */
+ for (exec = exec_list; exec; exec = exec->next) {
+ status = add_exec_to_proxy(exec, pg->proxy_list, exec->proc_count);
HYDU_ERR_POP(status, "unable to add executable to proxy\n");
+ }
+ }
+ else {
+ exec = exec_list;
+ proxy = pg->proxy_list;
- proxy_rem_procs -= exec_rem_procs;
- if (proxy_rem_procs == 0) {
+ pcr = process_core_ratio;
+
+ exec_rem_procs = exec_list->proc_count;
+ proxy_rem_cores = proxy->node->core_count * pcr - proxy->node->active_processes;
+
+ while (exec) {
+ num_procs = (exec_rem_procs > proxy_rem_cores) ? proxy_rem_cores : exec_rem_procs;
+
+ exec_rem_procs -= num_procs;
+ proxy_rem_cores -= num_procs;
+
+ if (num_procs) {
+ status = add_exec_to_proxy(exec, proxy, num_procs);
+ HYDU_ERR_POP(status, "unable to add executable to proxy\n");
+ }
+
+ if (exec_rem_procs == 0) {
+ exec = exec->next;
+ if (exec)
+ exec_rem_procs = exec->proc_count;
+ else
+ break;
+ }
+
+ if (proxy_rem_cores == 0) {
proxy = proxy->next;
+
if (proxy == NULL)
proxy = pg->proxy_list;
- proxy_rem_procs = proxy->node.core_count;
+
+ if (proxy->node->node_id == 0)
+ pcr++;
+
+ proxy_rem_cores = proxy->node->core_count * pcr - proxy->node->active_processes;
}
+ }
+ }
- exec = exec->next;
- exec_rem_procs = exec ? exec->proc_count : 0;
+ /* Get rid of the proxies that do not have any executables
+ * attached to them */
+ while (pg->proxy_list->exec_list == NULL) {
+ tproxy = pg->proxy_list;
+ pg->proxy_list = tproxy->next;
+ tproxy->next = NULL;
+ HYDU_free_proxy_list(tproxy);
+ }
+
+ pg->proxy_list->proxy_id = proxy_id = 0;
+ for (proxy = pg->proxy_list; proxy->next;) {
+ if (proxy->next->exec_list == NULL) {
+ tproxy = proxy->next;
+ proxy->next = tproxy->next;
+ tproxy->next = NULL;
+ HYDU_free_proxy_list(tproxy);
}
else {
- status = add_exec_to_proxy(exec, proxy, proxy_rem_procs);
- HYDU_ERR_POP(status, "unable to add executable to proxy\n");
-
- exec_rem_procs -= proxy_rem_procs;
-
proxy = proxy->next;
- if (proxy == NULL)
- proxy = pg->proxy_list;
- proxy_rem_procs = proxy->node.core_count;
+ proxy->proxy_id = ++proxy_id;
}
}
Modified: mpich2/trunk/src/pm/hydra/utils/others/others.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/others/others.c 2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/utils/others/others.c 2011-02-21 20:53:45 UTC (rev 8001)
@@ -6,13 +6,8 @@
#include "hydra.h"
-int HYDU_local_to_global_id(int local_id, int start_pid, int core_count, int global_core_count)
-{
- return ((local_id / core_count) * global_core_count) + (local_id % core_count) + start_pid;
-}
-
HYD_status HYDU_add_to_node_list(const char *hostname, int num_procs,
- struct HYD_node ** node_list)
+ struct HYD_node **node_list)
{
struct HYD_node *node;
HYD_status status = HYD_SUCCESS;
More information about the mpich2-commits
mailing list