[mpich2-commits] r8001 - in mpich2/trunk/src/pm/hydra: include pm/pmiserv tools/bootstrap/external tools/bootstrap/include tools/bootstrap/persist tools/bootstrap/src tools/debugger ui/mpich ui/utils utils/alloc utils/others

balaji at mcs.anl.gov balaji at mcs.anl.gov
Mon Feb 21 14:53:46 CST 2011


Author: balaji
Date: 2011-02-21 14:53:45 -0600 (Mon, 21 Feb 2011)
New Revision: 8001

Modified:
   mpich2/trunk/src/pm/hydra/include/hydra.h
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip.h
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_cb.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_pmi_v1.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_utils.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_cb.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmci.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v1.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v2.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_utils.c
   mpich2/trunk/src/pm/hydra/tools/bootstrap/external/external.h
   mpich2/trunk/src/pm/hydra/tools/bootstrap/external/external_launch.c
   mpich2/trunk/src/pm/hydra/tools/bootstrap/external/ll.h
   mpich2/trunk/src/pm/hydra/tools/bootstrap/external/ll_launch.c
   mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm.h
   mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm_launch.c
   mpich2/trunk/src/pm/hydra/tools/bootstrap/include/bsci.h
   mpich2/trunk/src/pm/hydra/tools/bootstrap/persist/persist_client.h
   mpich2/trunk/src/pm/hydra/tools/bootstrap/persist/persist_launch.c
   mpich2/trunk/src/pm/hydra/tools/bootstrap/src/bsci_launch.c
   mpich2/trunk/src/pm/hydra/tools/debugger/debugger.c
   mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.c
   mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.h
   mpich2/trunk/src/pm/hydra/ui/mpich/utils.c
   mpich2/trunk/src/pm/hydra/ui/utils/uiu.c
   mpich2/trunk/src/pm/hydra/utils/alloc/alloc.c
   mpich2/trunk/src/pm/hydra/utils/others/others.c
Log:
Redo the PMI ID calculation code entirely. Now we use a static list of
nodes, and explicitly manage how many processes are running on each
node. When a new group is launched this is updated. This allows for
better load-balancing of dynamically spawned processes. Fixes ticket
\#1434.

No reviewer.

Modified: mpich2/trunk/src/pm/hydra/include/hydra.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra.h	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/include/hydra.h	2011-02-21 20:53:45 UTC (rev 8001)
@@ -252,7 +252,10 @@
 struct HYD_node {
     char *hostname;
     int core_count;
+    int active_processes;
 
+    int node_id;
+
     /* Username */
     char *user;
 
@@ -264,7 +267,7 @@
 
 /* Proxy information */
 struct HYD_proxy {
-    struct HYD_node node;
+    struct HYD_node *node;
 
     struct HYD_pg *pg;          /* Back pointer to the PG */
 
@@ -272,9 +275,11 @@
 
     int proxy_id;
 
-    int start_pid;
     int proxy_process_count;
 
+    /* Filler processes that we are adding on this proxy */
+    int filler_processes;
+
     struct HYD_exec *exec_list;
 
     int *pid;
@@ -419,17 +424,15 @@
 void HYDU_init_global_env(struct HYD_env_global *global_env);
 void HYDU_finalize_global_env(struct HYD_env_global *global_env);
 HYD_status HYDU_alloc_node(struct HYD_node **node);
-void HYDU_dup_node(struct HYD_node src, struct HYD_node *dest);
 void HYDU_free_node_list(struct HYD_node *node_list);
 void HYDU_init_pg(struct HYD_pg *pg, int pgid);
 HYD_status HYDU_alloc_pg(struct HYD_pg **pg, int pgid);
 void HYDU_free_pg_list(struct HYD_pg *pg_list);
-HYD_status HYDU_alloc_proxy(struct HYD_proxy **proxy, struct HYD_pg *pg);
 void HYDU_free_proxy_list(struct HYD_proxy *proxy_list);
 HYD_status HYDU_alloc_exec(struct HYD_exec **exec);
 void HYDU_free_exec_list(struct HYD_exec *exec_list);
 HYD_status HYDU_create_proxy_list(struct HYD_exec *exec_list, struct HYD_node *node_list,
-                                  struct HYD_pg *pg, int proc_offset);
+                                  struct HYD_pg *pg);
 HYD_status HYDU_correct_wdir(char **wdir);
 
 /* args */
@@ -473,8 +476,6 @@
                                struct HYDT_bind_cpuset_t cpuset);
 
 /* others */
-int HYDU_local_to_global_id(int local_id, int start_pid, int core_count,
-                            int global_core_count);
 HYD_status HYDU_add_to_node_list(const char *hostname, int num_procs,
                                  struct HYD_node **node_list);
 HYD_status HYDU_gethostname(char *hostname);

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip.c	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip.c	2011-02-21 20:53:45 UTC (rev 8001)
@@ -19,7 +19,13 @@
 
     HYDU_init_user_global(&HYD_pmcd_pmip.user_global);
 
-    HYD_pmcd_pmip.system_global.global_core_count = -1;
+    HYD_pmcd_pmip.system_global.global_core_map.left = -1;
+    HYD_pmcd_pmip.system_global.global_core_map.current = -1;
+    HYD_pmcd_pmip.system_global.global_core_map.right = -1;
+    HYD_pmcd_pmip.system_global.filler_process_map.left = -1;
+    HYD_pmcd_pmip.system_global.filler_process_map.current = -1;
+    HYD_pmcd_pmip.system_global.filler_process_map.right = -1;
+
     HYD_pmcd_pmip.system_global.global_process_count = -1;
     HYD_pmcd_pmip.system_global.jobid = NULL;
     HYD_pmcd_pmip.system_global.pmi_port = NULL;
@@ -48,7 +54,6 @@
     HYD_pmcd_pmip.local.proxy_core_count = -1;
     HYD_pmcd_pmip.local.proxy_process_count = -1;
 
-    HYD_pmcd_pmip.start_pid = -1;
     HYD_pmcd_pmip.exec_list = NULL;
 
     status = HYD_pmcd_pmi_allocate_kvs(&HYD_pmcd_pmip.local.kvs, -1);

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip.h
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip.h	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip.h	2011-02-21 20:53:45 UTC (rev 8001)
@@ -10,11 +10,20 @@
 #include "hydra.h"
 #include "common.h"
 
+struct HYD_pmcd_pmip_map {
+    int left;
+    int current;
+    int right;
+    int total;
+};
+
 struct HYD_pmcd_pmip {
     struct HYD_user_global user_global;
 
     struct {
-        int global_core_count;
+        struct HYD_pmcd_pmip_map global_core_map;
+        struct HYD_pmcd_pmip_map filler_process_map;
+
         int global_process_count;
         char *jobid;
 
@@ -63,7 +72,6 @@
     } local;
 
     /* Process segmentation information for this proxy */
-    int start_pid;
     struct HYD_exec *exec_list;
 };
 

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_cb.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_cb.c	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_cb.c	2011-02-21 20:53:45 UTC (rev 8001)
@@ -442,6 +442,34 @@
     goto fn_exit;
 }
 
+static int local_to_global_id(int local_id)
+{
+    int rem1, layer, rem2;
+    int ret;
+
+    if (local_id < HYD_pmcd_pmip.system_global.filler_process_map.current)
+        ret = HYD_pmcd_pmip.system_global.filler_process_map.left + local_id;
+    else {
+        /* rem1 gives the number of processes remaining after the
+         * filling the holes */
+        rem1 = local_id - HYD_pmcd_pmip.system_global.filler_process_map.current;
+
+        /* layer gives the layer of filling in which our process lies
+         * starting from layer 0; in each layer, we fill all proxies
+         * in the global list */
+        layer = rem1 / HYD_pmcd_pmip.system_global.global_core_map.current;
+
+        /* rem2 gives our relative index in the layer we belong to */
+        rem2 = rem1 % HYD_pmcd_pmip.system_global.global_core_map.current;
+
+        ret = (HYD_pmcd_pmip.system_global.filler_process_map.total +
+               (layer * HYD_pmcd_pmip.system_global.global_core_map.total) +
+               HYD_pmcd_pmip.system_global.global_core_map.left + rem2);
+    }
+
+    return ret;
+}
+
 static HYD_status launch_procs(void)
 {
     int i, j, arg, process_id;
@@ -491,10 +519,7 @@
         HYD_pmcd_pmip.downstream.pmi_fd_active[i] = 0;
 
         if (HYD_pmcd_pmip.system_global.pmi_rank == -1)
-            HYD_pmcd_pmip.downstream.pmi_rank[i] =
-                HYDU_local_to_global_id(i, HYD_pmcd_pmip.start_pid,
-                                        HYD_pmcd_pmip.local.proxy_core_count,
-                                        HYD_pmcd_pmip.system_global.global_core_count);
+            HYD_pmcd_pmip.downstream.pmi_rank[i] = local_to_global_id(i);
         else
             HYD_pmcd_pmip.downstream.pmi_rank[i] = HYD_pmcd_pmip.system_global.pmi_rank;
     }
@@ -769,15 +794,27 @@
     } while (1);
 
     /* verify the arguments we got */
-    if (HYD_pmcd_pmip.system_global.global_core_count == -1)
-        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "global core count not available\n");
+    if (HYD_pmcd_pmip.system_global.global_core_map.left == -1 ||
+        HYD_pmcd_pmip.system_global.global_core_map.current == -1 ||
+        HYD_pmcd_pmip.system_global.global_core_map.right == -1)
+        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
+                            "cannot find global core map (%d,%d,%d)\n",
+                            HYD_pmcd_pmip.system_global.global_core_map.left,
+                            HYD_pmcd_pmip.system_global.global_core_map.current,
+                            HYD_pmcd_pmip.system_global.global_core_map.right);
 
+    if (HYD_pmcd_pmip.system_global.filler_process_map.left == -1 ||
+        HYD_pmcd_pmip.system_global.filler_process_map.current == -1 ||
+        HYD_pmcd_pmip.system_global.filler_process_map.right == -1)
+        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
+                            "cannot find available cores (%d,%d,%d)\n",
+                            HYD_pmcd_pmip.system_global.filler_process_map.left,
+                            HYD_pmcd_pmip.system_global.filler_process_map.current,
+                            HYD_pmcd_pmip.system_global.filler_process_map.right);
+
     if (HYD_pmcd_pmip.local.proxy_core_count == -1)
         HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "proxy core count not available\n");
 
-    if (HYD_pmcd_pmip.start_pid == -1)
-        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "start PID not available\n");
-
     if (HYD_pmcd_pmip.exec_list == NULL && HYD_pmcd_pmip.user_global.ckpoint_prefix == NULL)
         HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                             "no executable given and doesn't look like a restart either\n");

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_pmi_v1.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_pmi_v1.c	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_pmi_v1.c	2011-02-21 20:53:45 UTC (rev 8001)
@@ -297,7 +297,7 @@
 
     i = 0;
     tmp[i++] = HYDU_strdup("cmd=universe_size size=");
-    tmp[i++] = HYDU_int_to_str(HYD_pmcd_pmip.system_global.global_core_count);
+    tmp[i++] = HYDU_int_to_str(HYD_pmcd_pmip.system_global.global_core_map.total);
     tmp[i++] = HYDU_strdup("\n");
     tmp[i++] = NULL;
 

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_utils.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_utils.c	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_utils.c	2011-02-21 20:53:45 UTC (rev 8001)
@@ -206,11 +206,89 @@
     return HYDU_set_str_and_incr(arg, argv, &HYD_pmcd_pmip.user_global.global_env.prop);
 }
 
-static HYD_status global_core_count_fn(char *arg, char ***argv)
+static HYD_status split_map(char *map, int *left, int *current, int *right)
 {
-    return HYDU_set_int_and_incr(arg, argv, &HYD_pmcd_pmip.system_global.global_core_count);
+    char *tmp;
+    HYD_status status = HYD_SUCCESS;
+
+    tmp = strtok(map, ",");
+    HYDU_ASSERT(tmp, status);
+    *left = atoi(tmp);
+
+    tmp = strtok(NULL, ",");
+    HYDU_ASSERT(tmp, status);
+    *current = atoi(tmp);
+
+    tmp = strtok(NULL, ",");
+    HYDU_ASSERT(tmp, status);
+    *right = atoi(tmp);
+
+  fn_exit:
+    HYDU_FUNC_EXIT();
+    return status;
+
+  fn_fail:
+    goto fn_exit;
 }
 
+static HYD_status global_core_map_fn(char *arg, char ***argv)
+{
+    char *map;
+    HYD_status status = HYD_SUCCESS;
+
+    /* Split the core map into three different segments */
+    map = HYDU_strdup(**argv);
+    HYDU_ASSERT(map, status);
+
+    status = split_map(map, &HYD_pmcd_pmip.system_global.global_core_map.left,
+                       &HYD_pmcd_pmip.system_global.global_core_map.current,
+                       &HYD_pmcd_pmip.system_global.global_core_map.right);
+    HYDU_ERR_POP(status, "unable to split the provided mapping\n");
+
+    HYD_pmcd_pmip.system_global.global_core_map.total =
+        HYD_pmcd_pmip.system_global.global_core_map.left +
+        HYD_pmcd_pmip.system_global.global_core_map.current +
+        HYD_pmcd_pmip.system_global.global_core_map.right;
+
+    (*argv)++;
+
+  fn_exit:
+    HYDU_FUNC_EXIT();
+    return status;
+
+  fn_fail:
+    goto fn_exit;
+}
+
+static HYD_status filler_process_map_fn(char *arg, char ***argv)
+{
+    char *map;
+    HYD_status status = HYD_SUCCESS;
+
+    /* Split the core map into three different segments */
+    map = HYDU_strdup(**argv);
+    HYDU_ASSERT(map, status);
+
+    status = split_map(map, &HYD_pmcd_pmip.system_global.filler_process_map.left,
+                       &HYD_pmcd_pmip.system_global.filler_process_map.current,
+                       &HYD_pmcd_pmip.system_global.filler_process_map.right);
+    HYDU_ERR_POP(status, "unable to split the provided mapping\n");
+
+    HYD_pmcd_pmip.system_global.filler_process_map.total =
+        HYD_pmcd_pmip.system_global.filler_process_map.left +
+        HYD_pmcd_pmip.system_global.filler_process_map.current +
+        HYD_pmcd_pmip.system_global.filler_process_map.right;
+
+    (*argv)++;
+
+  fn_exit:
+    HYDU_FUNC_EXIT();
+    return status;
+
+  fn_fail:
+    goto fn_exit;
+}
+
 static HYD_status global_process_count_fn(char *arg, char ***argv)
 {
     return HYDU_set_int_and_incr(arg, argv, &HYD_pmcd_pmip.system_global.global_process_count);
@@ -254,11 +332,6 @@
     return HYDU_set_int_and_incr(arg, argv, &HYD_pmcd_pmip.local.proxy_core_count);
 }
 
-static HYD_status start_pid_fn(char *arg, char ***argv)
-{
-    return HYDU_set_int_and_incr(arg, argv, &HYD_pmcd_pmip.start_pid);
-}
-
 static HYD_status exec_fn(char *arg, char ***argv)
 {
     struct HYD_exec *exec = NULL;
@@ -406,14 +479,14 @@
     {"global-system-env", global_env_fn, NULL},
     {"global-user-env", global_env_fn, NULL},
     {"genv-prop", genv_prop_fn, NULL},
-    {"global-core-count", global_core_count_fn, NULL},
+    {"global-core-map", global_core_map_fn, NULL},
+    {"filler-process-map", filler_process_map_fn, NULL},
     {"global-process-count", global_process_count_fn, NULL},
     {"version", version_fn, NULL},
     {"interface-env-name", interface_env_name_fn, NULL},
     {"hostname", hostname_fn, NULL},
     {"local-binding", local_binding_fn, NULL},
     {"proxy-core-count", proxy_core_count_fn, NULL},
-    {"start-pid", start_pid_fn, NULL},
     {"exec", exec_fn, NULL},
     {"exec-appnum", exec_appnum_fn, NULL},
     {"exec-proc-count", exec_proc_count_fn, NULL},

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_cb.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_cb.c	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_cb.c	2011-02-21 20:53:45 UTC (rev 8001)
@@ -117,6 +117,10 @@
     if (pg->pgid == 0)
         HYDT_dbg_free_procdesc();
 
+    /* Reset the node allocations for this PG */
+    for (tproxy = pg->proxy_list; tproxy; tproxy = tproxy->next)
+        tproxy->node->active_processes -= tproxy->proxy_process_count;
+
   fn_exit:
     HYDU_FUNC_EXIT();
     return status;

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmci.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmci.c	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmci.c	2011-02-21 20:53:45 UTC (rev 8001)
@@ -99,7 +99,6 @@
 HYD_status HYD_pmci_launch_procs(void)
 {
     struct HYD_proxy *proxy;
-    struct HYD_node *node_list = NULL, *node, *tnode;
     char *proxy_args[HYD_NUM_TMP_STRINGS] = { NULL }, *control_port = NULL;
     int node_count, i, *control_fd;
     HYD_status status = HYD_SUCCESS;
@@ -113,25 +112,6 @@
     status = HYD_pmcd_pmi_alloc_pg_scratch(&HYD_server_info.pg_list);
     HYDU_ERR_POP(status, "error allocating pg scratch space\n");
 
-    /* Copy the host list to pass to the launcher */
-    node_list = NULL;
-    node_count = 0;
-    for (proxy = HYD_server_info.pg_list.proxy_list; proxy; proxy = proxy->next) {
-        HYDU_alloc_node(&node);
-        HYDU_dup_node(proxy->node, node);
-        node->next = NULL;
-
-        if (node_list == NULL) {
-            node_list = node;
-        }
-        else {
-            for (tnode = node_list; tnode->next; tnode = tnode->next);
-            tnode->next = node;
-        }
-
-        node_count++;
-    }
-
     status = HYDU_sock_create_and_listen_portstr(HYD_server_info.user_global.iface,
                                                  HYD_server_info.local_hostname,
                                                  HYD_server_info.port_range, &control_port,
@@ -147,6 +127,10 @@
     status = HYD_pmcd_pmi_fill_in_exec_launch_info(&HYD_server_info.pg_list);
     HYDU_ERR_POP(status, "unable to fill in executable arguments\n");
 
+    node_count = 0;
+    for (proxy = HYD_server_info.pg_list.proxy_list; proxy; proxy = proxy->next)
+        node_count++;
+
     HYDU_MALLOC(control_fd, int *, node_count * sizeof(int), status);
     for (i = 0; i < node_count; i++)
         control_fd[i] = HYD_FD_UNSET;
@@ -156,7 +140,8 @@
                        HYD_server_info.user_global.bindlib);
     HYDU_ERR_POP(status, "unable to initializing binding library");
 
-    status = HYDT_bsci_launch_procs(proxy_args, node_list, control_fd);
+    status =
+        HYDT_bsci_launch_procs(proxy_args, HYD_server_info.pg_list.proxy_list, control_fd);
     HYDU_ERR_POP(status, "launcher cannot launch processes\n");
 
     for (i = 0, proxy = HYD_server_info.pg_list.proxy_list; proxy; proxy = proxy->next, i++)
@@ -174,7 +159,6 @@
     if (control_port)
         HYDU_FREE(control_port);
     HYDU_free_strlist(proxy_args);
-    HYDU_free_node_list(node_list);
     HYDU_FUNC_EXIT();
     return status;
 

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v1.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v1.c	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v1.c	2011-02-21 20:53:45 UTC (rev 8001)
@@ -262,7 +262,7 @@
 {
     struct HYD_pg *pg;
     struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
-    struct HYD_node *node_list = NULL, *node, *tnode, *user_node_list = NULL;
+    struct HYD_node *user_node_list = NULL;
     struct HYD_proxy *proxy;
     struct HYD_pmcd_token *tokens;
     struct HYD_exec *exec_list = NULL, *exec;
@@ -506,11 +506,11 @@
         offset += pg->pg_process_count;
 
     if (user_node_list) {
-        status = HYDU_create_proxy_list(exec_list, user_node_list, pg, 0);
+        status = HYDU_create_proxy_list(exec_list, user_node_list, pg);
         HYDU_ERR_POP(status, "error creating proxy list\n");
     }
     else {
-        status = HYDU_create_proxy_list(exec_list, HYD_server_info.node_list, pg, offset);
+        status = HYDU_create_proxy_list(exec_list, HYD_server_info.node_list, pg);
         HYDU_ERR_POP(status, "error creating proxy list\n");
     }
     HYDU_free_exec_list(exec_list);
@@ -527,23 +527,6 @@
     /* Go to the last PG */
     for (pg = &HYD_server_info.pg_list; pg->next; pg = pg->next);
 
-    /* Copy the host list to pass to the launcher */
-    node_list = NULL;
-    for (proxy = pg->proxy_list; proxy; proxy = proxy->next) {
-        HYDU_alloc_node(&node);
-        node->hostname = HYDU_strdup(proxy->node.hostname);
-        node->core_count = proxy->node.core_count;
-        node->next = NULL;
-
-        if (node_list == NULL) {
-            node_list = node;
-        }
-        else {
-            for (tnode = node_list; tnode->next; tnode = tnode->next);
-            tnode->next = node;
-        }
-    }
-
     status = HYD_pmcd_pmi_fill_in_proxy_args(proxy_args, control_port, new_pgid);
     HYDU_ERR_POP(status, "unable to fill in proxy arguments\n");
     HYDU_FREE(control_port);
@@ -551,9 +534,8 @@
     status = HYD_pmcd_pmi_fill_in_exec_launch_info(pg);
     HYDU_ERR_POP(status, "unable to fill in executable arguments\n");
 
-    status = HYDT_bsci_launch_procs(proxy_args, node_list, NULL);
+    status = HYDT_bsci_launch_procs(proxy_args, pg->proxy_list, NULL);
     HYDU_ERR_POP(status, "launcher cannot launch processes\n");
-    HYDU_free_node_list(node_list);
 
     {
         char *cmd_str[HYD_NUM_TMP_STRINGS], *cmd;

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v2.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v2.c	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v2.c	2011-02-21 20:53:45 UTC (rev 8001)
@@ -455,7 +455,7 @@
 {
     struct HYD_pg *pg;
     struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
-    struct HYD_node *node_list = NULL, *node, *tnode, *user_node_list = NULL;
+    struct HYD_node *user_node_list = NULL;
     struct HYD_proxy *proxy;
     struct HYD_pmcd_token *tokens;
     struct HYD_exec *exec_list = NULL, *exec;
@@ -687,11 +687,11 @@
         offset += pg->pg_process_count;
 
     if (user_node_list) {
-        status = HYDU_create_proxy_list(exec_list, user_node_list, pg, 0);
+        status = HYDU_create_proxy_list(exec_list, user_node_list, pg);
         HYDU_ERR_POP(status, "error creating proxy list\n");
     }
     else {
-        status = HYDU_create_proxy_list(exec_list, HYD_server_info.node_list, pg, offset);
+        status = HYDU_create_proxy_list(exec_list, HYD_server_info.node_list, pg);
         HYDU_ERR_POP(status, "error creating proxy list\n");
     }
     HYDU_free_exec_list(exec_list);
@@ -708,23 +708,6 @@
     /* Go to the last PG */
     for (pg = &HYD_server_info.pg_list; pg->next; pg = pg->next);
 
-    /* Copy the host list to pass to the launcher */
-    node_list = NULL;
-    for (proxy = pg->proxy_list; proxy; proxy = proxy->next) {
-        HYDU_alloc_node(&node);
-        node->hostname = HYDU_strdup(proxy->node.hostname);
-        node->core_count = proxy->node.core_count;
-        node->next = NULL;
-
-        if (node_list == NULL) {
-            node_list = node;
-        }
-        else {
-            for (tnode = node_list; tnode->next; tnode = tnode->next);
-            tnode->next = node;
-        }
-    }
-
     status = HYD_pmcd_pmi_fill_in_proxy_args(proxy_args, control_port, new_pgid);
     HYDU_ERR_POP(status, "unable to fill in proxy arguments\n");
     HYDU_FREE(control_port);
@@ -732,9 +715,8 @@
     status = HYD_pmcd_pmi_fill_in_exec_launch_info(pg);
     HYDU_ERR_POP(status, "unable to fill in executable arguments\n");
 
-    status = HYDT_bsci_launch_procs(proxy_args, node_list, NULL);
+    status = HYDT_bsci_launch_procs(proxy_args, pg->proxy_list, NULL);
     HYDU_ERR_POP(status, "launcher cannot launch processes\n");
-    HYDU_free_node_list(node_list);
 
     {
         char *cmd_str[HYD_NUM_TMP_STRINGS], *cmd;

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_utils.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_utils.c	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_utils.c	2011-02-21 20:53:45 UTC (rev 8001)
@@ -136,25 +136,25 @@
             HYDU_MALLOC(block, struct block *, sizeof(struct block), status);
             block->start_idx = proxy->proxy_id;
             block->num_blocks = 1;
-            block->block_size = proxy->node.core_count;
+            block->block_size = proxy->node->core_count;
             block->next = NULL;
 
             blocklist_tail = blocklist_head = block;
         }
         else if (blocklist_tail->start_idx + blocklist_tail->num_blocks == proxy->proxy_id &&
-                 blocklist_tail->block_size == proxy->node.core_count) {
+                 blocklist_tail->block_size == proxy->node->core_count) {
             blocklist_tail->num_blocks++;
         }
         else if (blocklist_tail->start_idx == proxy->proxy_id &&
                  blocklist_tail->num_blocks == 1) {
-            blocklist_tail->block_size += proxy->node.core_count;
+            blocklist_tail->block_size += proxy->node->core_count;
         }
         else {
             HYDU_MALLOC(blocklist_tail->next, struct block *, sizeof(struct block), status);
             blocklist_tail = blocklist_tail->next;
             blocklist_tail->start_idx = proxy->proxy_id;
             blocklist_tail->num_blocks = 1;
-            blocklist_tail->block_size = proxy->node.core_count;
+            blocklist_tail->block_size = proxy->node->core_count;
             blocklist_tail->next = NULL;
         }
     }
@@ -225,8 +225,10 @@
     struct HYD_exec *exec;
     struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
     char *mapping = NULL;
-    char *pmi_fd = NULL, *pmi_port = NULL;
-    int pmi_rank, ret;
+    char *pmi_fd = NULL, *pmi_port = NULL, *map = NULL;
+    int pmi_rank, ret, left_global_cores, right_global_cores;
+    int left_filler_processes, right_filler_processes;
+    char *tmp[HYD_NUM_TMP_STRINGS];
     HYD_status status = HYD_SUCCESS;
 
     status = pmi_process_mapping(pg, &mapping);
@@ -241,6 +243,14 @@
 
     /* Create the arguments list for each proxy */
     process_id = 0;
+    right_global_cores = HYD_server_info.global_core_count;
+    left_global_cores = 0;
+
+    right_filler_processes = 0;
+    for (proxy = pg->proxy_list; proxy; proxy = proxy->next)
+        right_filler_processes += proxy->filler_processes;
+    left_filler_processes = 0;
+
     for (proxy = pg->proxy_list; proxy; proxy = proxy->next) {
         for (inherited_env_count = 0, env = HYD_server_info.user_global.global_env.inherited;
              env; env = env->next, inherited_env_count++);
@@ -277,11 +287,53 @@
         }
 
         proxy->exec_launch_info[arg++] = HYDU_strdup("--hostname");
-        proxy->exec_launch_info[arg++] = HYDU_strdup(proxy->node.hostname);
+        proxy->exec_launch_info[arg++] = HYDU_strdup(proxy->node->hostname);
 
-        proxy->exec_launch_info[arg++] = HYDU_strdup("--global-core-count");
-        proxy->exec_launch_info[arg++] = HYDU_int_to_str(HYD_server_info.global_core_count);
+        /* A map has three fields -- the entire system is considered
+         * to have three nodes; the nodes on the left of the current
+         * node are all grouped into one node, and the nodes to the
+         * right are grouped into another. */
 
+        /* Global core map */
+        right_global_cores -= proxy->node->core_count;
+
+        proxy->exec_launch_info[arg++] = HYDU_strdup("--global-core-map");
+        tmp[0] = HYDU_int_to_str(left_global_cores);
+        tmp[1] = HYDU_strdup(",");
+        tmp[2] = HYDU_int_to_str(proxy->node->core_count);
+        tmp[3] = HYDU_strdup(",");
+        tmp[4] = HYDU_int_to_str(right_global_cores);
+        tmp[5] = NULL;
+        status = HYDU_str_alloc_and_join(tmp, &map);
+        HYDU_ERR_POP(status, "unable to join strings\n");
+
+        proxy->exec_launch_info[arg++] = map;
+        HYDU_free_strlist(tmp);
+
+        left_global_cores += proxy->node->core_count;
+
+        /* Filler process map */
+        right_filler_processes -= proxy->filler_processes;
+
+        proxy->exec_launch_info[arg++] = HYDU_strdup("--filler-process-map");
+        tmp[0] = HYDU_int_to_str(left_filler_processes);
+        tmp[1] = HYDU_strdup(",");
+        tmp[2] = HYDU_int_to_str(proxy->filler_processes);
+        tmp[3] = HYDU_strdup(",");
+        tmp[4] = HYDU_int_to_str(right_filler_processes);
+        tmp[5] = NULL;
+        status = HYDU_str_alloc_and_join(tmp, &map);
+        HYDU_ERR_POP(status, "unable to join strings\n");
+
+        HYDU_ASSERT(left_filler_processes >= 0, status);
+        HYDU_ASSERT(proxy->filler_processes >= 0, status);
+        HYDU_ASSERT(right_filler_processes >= 0, status);
+
+        proxy->exec_launch_info[arg++] = map;
+        HYDU_free_strlist(tmp);
+
+        left_filler_processes += proxy->filler_processes;
+
         proxy->exec_launch_info[arg++] = HYDU_strdup("--global-process-count");
         proxy->exec_launch_info[arg++] = HYDU_int_to_str(pg->pg_process_count);
 
@@ -355,9 +407,9 @@
         proxy->exec_launch_info[arg++] = HYDU_strdup("--pmi-process-mapping");
         proxy->exec_launch_info[arg++] = HYDU_strdup(mapping);
 
-        if (proxy->node.local_binding) {
+        if (proxy->node->local_binding) {
             proxy->exec_launch_info[arg++] = HYDU_strdup("--local-binding");
-            proxy->exec_launch_info[arg++] = HYDU_strdup(proxy->node.local_binding);
+            proxy->exec_launch_info[arg++] = HYDU_strdup(proxy->node->local_binding);
         }
 
         if (HYD_server_info.user_global.binding) {
@@ -430,11 +482,8 @@
                 HYDU_strdup(HYD_server_info.user_global.global_env.prop);
         }
 
-        proxy->exec_launch_info[arg++] = HYDU_strdup("--start-pid");
-        proxy->exec_launch_info[arg++] = HYDU_int_to_str(proxy->start_pid);
-
         proxy->exec_launch_info[arg++] = HYDU_strdup("--proxy-core-count");
-        proxy->exec_launch_info[arg++] = HYDU_int_to_str(proxy->node.core_count);
+        proxy->exec_launch_info[arg++] = HYDU_int_to_str(proxy->node->core_count);
         proxy->exec_launch_info[arg++] = NULL;
 
         /* Now pass the local executable information */

Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/external/external.h
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/external/external.h	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/external/external.h	2011-02-21 20:53:45 UTC (rev 8001)
@@ -14,7 +14,7 @@
 #include "sge.h"
 #include "pbs.h"
 
-HYD_status HYDT_bscd_external_launch_procs(char **args, struct HYD_node *node_list,
+HYD_status HYDT_bscd_external_launch_procs(char **args, struct HYD_proxy *proxy_list,
                                            int *control_fd);
 HYD_status HYDT_bscd_external_launcher_finalize(void);
 HYD_status HYDT_bscd_external_query_env_inherit(const char *env_name, int *ret);

Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/external/external_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/external/external_launch.c	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/external/external_launch.c	2011-02-21 20:53:45 UTC (rev 8001)
@@ -97,13 +97,13 @@
     goto fn_exit;
 }
 
-HYD_status HYDT_bscd_external_launch_procs(char **args, struct HYD_node *node_list,
+HYD_status HYDT_bscd_external_launch_procs(char **args, struct HYD_proxy *proxy_list,
                                            int *control_fd)
 {
     int num_hosts, idx, i, host_idx, fd, exec_idx, offset, lh, len;
     int *pid, *fd_list, *dummy;
     int sockpair[2];
-    struct HYD_node *node;
+    struct HYD_proxy *proxy;
     char *targs[HYD_NUM_TMP_STRINGS], *path = NULL, *extra_arg_list = NULL, *extra_arg;
     char quoted_exec_string[HYD_TMP_STRLEN], *original_exec_string;
     struct HYD_env *env = NULL;
@@ -181,7 +181,7 @@
 
     /* pid_list might already have some PIDs */
     num_hosts = 0;
-    for (node = node_list; node; node = node->next)
+    for (proxy = proxy_list; proxy; proxy = proxy->next)
         num_hosts++;
 
     /* Increase pid list to accommodate these new pids */
@@ -201,18 +201,19 @@
 
     targs[idx] = NULL;
     HYDT_bind_cpuset_zero(&cpuset);
-    for (i = 0, node = node_list; node; node = node->next, i++) {
+    for (i = 0, proxy = proxy_list; proxy; proxy = proxy->next, i++) {
 
         if (targs[host_idx])
             HYDU_FREE(targs[host_idx]);
-        if (node->user == NULL) {
-            targs[host_idx] = HYDU_strdup(node->hostname);
+        if (proxy->node->user == NULL) {
+            targs[host_idx] = HYDU_strdup(proxy->node->hostname);
         }
         else {
-            len = strlen(node->user) + strlen("@") + strlen(node->hostname) + 1;
+            len = strlen(proxy->node->user) + strlen("@") + strlen(proxy->node->hostname) + 1;
 
             HYDU_MALLOC(targs[host_idx], char *, len, status);
-            MPL_snprintf(targs[host_idx], len, "%s@%s", node->user, node->hostname);
+            MPL_snprintf(targs[host_idx], len, "%s@%s", proxy->node->user,
+                         proxy->node->hostname);
         }
 
         /* append proxy ID */
@@ -227,11 +228,11 @@
          * connections causing the job to fail. This is basically a
          * hack to slow down ssh connections to the same node. */
         if (!strcmp(HYDT_bsci_info.launcher, "ssh")) {
-            status = HYDT_bscd_ssh_store_launch_time(node->hostname);
+            status = HYDT_bscd_ssh_store_launch_time(proxy->node->hostname);
             HYDU_ERR_POP(status, "error storing launch time\n");
         }
 
-        status = HYDU_sock_is_local(node->hostname, &lh);
+        status = HYDU_sock_is_local(proxy->node->hostname, &lh);
         HYDU_ERR_POP(status, "error checking if node is localhost\n");
 
         /* If launcher is 'fork', or this is the localhost, use fork

Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/external/ll.h
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/external/ll.h	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/external/ll.h	2011-02-21 20:53:45 UTC (rev 8001)
@@ -9,7 +9,8 @@
 
 #include "hydra.h"
 
-HYD_status HYDT_bscd_ll_launch_procs(char **args, struct HYD_node *node_list, int *control_fd);
+HYD_status HYDT_bscd_ll_launch_procs(char **args, struct HYD_proxy *proxy_list,
+                                     int *control_fd);
 HYD_status HYDT_bscd_ll_query_proxy_id(int *proxy_id);
 HYD_status HYDT_bscd_ll_query_node_list(struct HYD_node **node_list);
 HYD_status HYDTI_bscd_ll_query_node_count(int *count);

Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/external/ll_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/external/ll_launch.c	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/external/ll_launch.c	2011-02-21 20:53:45 UTC (rev 8001)
@@ -12,13 +12,14 @@
 
 static int fd_stdout, fd_stderr;
 
-HYD_status HYDT_bscd_ll_launch_procs(char **args, struct HYD_node *node_list, int *control_fd)
+HYD_status HYDT_bscd_ll_launch_procs(char **args, struct HYD_proxy *proxy_list,
+                                     int *control_fd)
 {
     int idx, i, total_procs, node_count;
     int *pid, *fd_list, exec_idx;
     char *targs[HYD_NUM_TMP_STRINGS], *node_list_str = NULL;
     char *path = NULL, *extra_arg_list = NULL, *extra_arg, quoted_exec_string[HYD_TMP_STRLEN];
-    struct HYD_node *node;
+    struct HYD_proxy *proxy;
     struct HYDT_bind_cpuset_t cpuset;
     HYD_status status = HYD_SUCCESS;
 
@@ -47,7 +48,7 @@
     HYDU_ERR_POP(status, "unable to query for the node count\n");
 
     node_count = 0;
-    for (node = node_list; node; node = node->next)
+    for (proxy = proxy_list; proxy; proxy = proxy->next)
         node_count++;
 
     if (total_procs != node_count)

Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm.h
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm.h	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm.h	2011-02-21 20:53:45 UTC (rev 8001)
@@ -9,7 +9,7 @@
 
 #include "hydra.h"
 
-HYD_status HYDT_bscd_slurm_launch_procs(char **args, struct HYD_node *node_list,
+HYD_status HYDT_bscd_slurm_launch_procs(char **args, struct HYD_proxy *proxy_list,
                                         int *control_fd);
 HYD_status HYDT_bscd_slurm_query_proxy_id(int *proxy_id);
 HYD_status HYDT_bscd_slurm_query_node_list(struct HYD_node **node_list);

Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm_launch.c	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm_launch.c	2011-02-21 20:53:45 UTC (rev 8001)
@@ -12,20 +12,20 @@
 
 static int fd_stdout, fd_stderr;
 
-static HYD_status node_list_to_str(struct HYD_node *node_list, char **node_list_str)
+static HYD_status proxy_list_to_node_str(struct HYD_proxy *proxy_list, char **node_list_str)
 {
     int i;
     char *tmp[HYD_NUM_TMP_STRINGS], *foo = NULL;
-    struct HYD_node *node;
+    struct HYD_proxy *proxy;
     HYD_status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
 
     i = 0;
-    for (node = node_list; node; node = node->next) {
-        tmp[i++] = HYDU_strdup(node->hostname);
+    for (proxy = proxy_list; proxy; proxy = proxy->next) {
+        tmp[i++] = HYDU_strdup(proxy->node->hostname);
 
-        if (node->next)
+        if (proxy->node->next)
             tmp[i++] = HYDU_strdup(",");
 
         /* If we used up more than half of the array elements, merge
@@ -59,7 +59,7 @@
     goto fn_exit;
 }
 
-HYD_status HYDT_bscd_slurm_launch_procs(char **args, struct HYD_node *node_list,
+HYD_status HYDT_bscd_slurm_launch_procs(char **args, struct HYD_proxy *proxy_list,
                                         int *control_fd)
 {
     int num_hosts, idx, i, exec_idx;
@@ -67,7 +67,7 @@
     char *targs[HYD_NUM_TMP_STRINGS], *node_list_str = NULL,
         quoted_exec_string[HYD_TMP_STRLEN];
     char *path = NULL, *extra_arg_list = NULL, *extra_arg;
-    struct HYD_node *node;
+    struct HYD_proxy *proxy;
     struct HYDT_bind_cpuset_t cpuset;
     HYD_status status = HYD_SUCCESS;
 
@@ -89,14 +89,14 @@
     if (!strcmp(HYDT_bsci_info.rmk, "slurm")) {
         targs[idx++] = HYDU_strdup("--nodelist");
 
-        status = node_list_to_str(node_list, &node_list_str);
+        status = proxy_list_to_node_str(proxy_list, &node_list_str);
         HYDU_ERR_POP(status, "unable to build a node list string\n");
 
         targs[idx++] = HYDU_strdup(node_list_str);
     }
 
     num_hosts = 0;
-    for (node = node_list; node; node = node->next)
+    for (proxy = proxy_list; proxy; proxy = proxy->next)
         num_hosts++;
 
     targs[idx++] = HYDU_strdup("-N");

Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/include/bsci.h
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/include/bsci.h	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/include/bsci.h	2011-02-21 20:53:45 UTC (rev 8001)
@@ -54,7 +54,7 @@
 
     /* Launcher functions */
     /** \brief Launch processes */
-    HYD_status(*launch_procs) (char **args, struct HYD_node * node_list, int *control_fd);
+    HYD_status(*launch_procs) (char **args, struct HYD_proxy * proxy_list, int *control_fd);
 
     /** \brief Finalize the bootstrap control device */
     HYD_status(*launcher_finalize) (void);
@@ -96,7 +96,7 @@
  * \brief HYDT_bsci_launch_procs - Launch processes
  *
  * \param[in]   args            Arguments to be used for the launched processes
- * \param[in]   node_list       List of nodes to launch processes on
+ * \param[in]   proxy_list      List of proxies to launch
  * \param[out]  control_fd      Control socket to communicate with the launched process
  * \param[in]   stdout_cb       Stdout callback function
  * \param[in]   stderr_cb       Stderr callback function
@@ -112,7 +112,7 @@
  * but allow proxies to query their ID information on each node using
  * the HYDT_bsci_query_proxy_id function.
  */
-HYD_status HYDT_bsci_launch_procs(char **args, struct HYD_node *node_list, int *control_fd);
+HYD_status HYDT_bsci_launch_procs(char **args, struct HYD_proxy *proxy_list, int *control_fd);
 
 
 /**

Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/persist/persist_client.h
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/persist/persist_client.h	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/persist/persist_client.h	2011-02-21 20:53:45 UTC (rev 8001)
@@ -11,7 +11,7 @@
 #include "bscu.h"
 #include "persist.h"
 
-HYD_status HYDT_bscd_persist_launch_procs(char **args, struct HYD_node *node_list,
+HYD_status HYDT_bscd_persist_launch_procs(char **args, struct HYD_proxy *proxy_list,
                                           int *control_fd);
 HYD_status HYDT_bscd_persist_wait_for_completion(int timeout);
 

Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/persist/persist_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/persist/persist_launch.c	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/persist/persist_launch.c	2011-02-21 20:53:45 UTC (rev 8001)
@@ -58,17 +58,17 @@
     goto fn_exit;
 }
 
-HYD_status HYDT_bscd_persist_launch_procs(char **args, struct HYD_node *node_list,
+HYD_status HYDT_bscd_persist_launch_procs(char **args, struct HYD_proxy *proxy_list,
                                           int *control_fd)
 {
-    struct HYD_node *node;
+    struct HYD_proxy *proxy;
     int idx, i;
     HYD_status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
 
     HYDT_bscd_persist_node_count = 0;
-    for (node = node_list; node; node = node->next)
+    for (proxy = proxy_list; proxy; proxy = proxy->next)
         HYDT_bscd_persist_node_count++;
 
     for (idx = 0; args[idx]; idx++);
@@ -77,11 +77,11 @@
     HYDU_MALLOC(HYDT_bscd_persist_control_fd, int *,
                 HYDT_bscd_persist_node_count * sizeof(int), status);
 
-    for (node = node_list, i = 0; node; node = node->next, i++) {
+    for (proxy = proxy_list, i = 0; proxy; proxy = proxy->next, i++) {
         args[idx] = HYDU_int_to_str(i);
 
         /* connect to hydserv on each node */
-        status = HYDU_sock_connect(node->hostname, PERSIST_DEFAULT_PORT,
+        status = HYDU_sock_connect(proxy->node->hostname, PERSIST_DEFAULT_PORT,
                                    &HYDT_bscd_persist_control_fd[i]);
         HYDU_ERR_POP(status, "unable to connect to the main server\n");
 

Modified: mpich2/trunk/src/pm/hydra/tools/bootstrap/src/bsci_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/bootstrap/src/bsci_launch.c	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/bootstrap/src/bsci_launch.c	2011-02-21 20:53:45 UTC (rev 8001)
@@ -7,13 +7,13 @@
 #include "hydra.h"
 #include "bsci.h"
 
-HYD_status HYDT_bsci_launch_procs(char **args, struct HYD_node *node_list, int *control_fd)
+HYD_status HYDT_bsci_launch_procs(char **args, struct HYD_proxy *proxy_list, int *control_fd)
 {
     HYD_status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
 
-    status = HYDT_bsci_fns.launch_procs(args, node_list, control_fd);
+    status = HYDT_bsci_fns.launch_procs(args, proxy_list, control_fd);
     HYDU_ERR_POP(status, "launcher returned error while launching processes\n");
 
   fn_exit:

Modified: mpich2/trunk/src/pm/hydra/tools/debugger/debugger.c
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/debugger/debugger.c	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/tools/debugger/debugger.c	2011-02-21 20:53:45 UTC (rev 8001)
@@ -40,7 +40,7 @@
         j = 0;
         for (exec = proxy->exec_list; exec; exec = exec->next) {
             for (np = 0; np < exec->proc_count; np++) {
-                MPIR_proctable[i].host_name = HYDU_strdup(proxy->node.hostname);
+                MPIR_proctable[i].host_name = HYDU_strdup(proxy->node->hostname);
                 MPIR_proctable[i].pid = proxy->pid[j++];
                 if (exec->exec[0])
                     MPIR_proctable[i].executable_name = HYDU_strdup(exec->exec[0]);

Modified: mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.c
===================================================================
--- mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.c	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.c	2011-02-21 20:53:45 UTC (rev 8001)
@@ -103,7 +103,6 @@
     printf("  Other Hydra options:\n");
     printf("    -verbose                         verbose mode\n");
     printf("    -info                            build information\n");
-    printf("    -print-rank-map                  print rank mapping\n");
     printf("    -print-all-exitcodes             print exit codes of all processes\n");
     printf("    -iface                           network interface to use\n");
     printf("    -ppn                             processes per node\n");
@@ -218,7 +217,7 @@
     struct HYD_proxy *proxy;
     struct HYD_exec *exec;
     struct HYD_node *node;
-    int exit_status = 0, i, process_id, proc_count, timeout, reset_rmk;
+    int exit_status = 0, i, timeout, reset_rmk;
     HYD_status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
@@ -310,8 +309,10 @@
     }
 
     HYD_server_info.global_core_count = 0;
-    for (node = HYD_server_info.node_list; node; node = node->next)
+    for (node = HYD_server_info.node_list, i = 0; node; node = node->next, i++) {
         HYD_server_info.global_core_count += node->core_count;
+        node->node_id = i;
+    }
 
     /* If the number of processes is not given, we allocate all the
      * available nodes to each executable */
@@ -332,20 +333,20 @@
     HYDU_ERR_POP(status, "unable to get the inherited env list\n");
 
     status = HYDU_create_proxy_list(HYD_uii_mpx_exec_list, HYD_server_info.node_list,
-                                    &HYD_server_info.pg_list, 0);
+                                    &HYD_server_info.pg_list);
     HYDU_ERR_POP(status, "unable to create proxy list\n");
 
     /* See if the node list contains a remotely accessible localhost */
     for (proxy = HYD_server_info.pg_list.proxy_list; proxy; proxy = proxy->next) {
         int is_local, remote_access;
 
-        status = HYDU_sock_is_local(proxy->node.hostname, &is_local);
-        HYDU_ERR_POP(status, "unable to check if %s is local\n", proxy->node.hostname);
+        status = HYDU_sock_is_local(proxy->node->hostname, &is_local);
+        HYDU_ERR_POP(status, "unable to check if %s is local\n", proxy->node->hostname);
 
         if (is_local) {
-            status = HYDU_sock_remote_access(proxy->node.hostname, &remote_access);
+            status = HYDU_sock_remote_access(proxy->node->hostname, &remote_access);
             HYDU_ERR_POP(status, "unable to check if %s is remotely accessible\n",
-                         proxy->node.hostname);
+                         proxy->node->hostname);
 
             if (remote_access)
                 break;
@@ -353,7 +354,7 @@
     }
 
     if (proxy)
-        HYD_server_info.local_hostname = HYDU_strdup(proxy->node.hostname);
+        HYD_server_info.local_hostname = HYDU_strdup(proxy->node->hostname);
 
     if (HYD_server_info.user_global.debug)
         HYD_uiu_print_params();
@@ -371,26 +372,6 @@
         MPL_env2str("MPICH_PORT_RANGE", (const char **) &HYD_server_info.port_range))
         HYD_server_info.port_range = HYDU_strdup(HYD_server_info.port_range);
 
-    if (HYD_ui_mpich_info.print_rank_map) {
-        for (proxy = HYD_server_info.pg_list.proxy_list; proxy; proxy = proxy->next) {
-            HYDU_dump_noprefix(stdout, "(%s:", proxy->node.hostname);
-
-            process_id = 0;
-            for (exec = proxy->exec_list; exec; exec = exec->next) {
-                for (i = 0; i < exec->proc_count; i++) {
-                    HYDU_dump_noprefix(stdout, "%d",
-                                       HYDU_local_to_global_id(process_id++,
-                                                               proxy->start_pid,
-                                                               proxy->node.core_count,
-                                                               HYD_server_info.global_core_count));
-                    if (i < exec->proc_count - 1 || exec->next)
-                        HYDU_dump_noprefix(stdout, ",");
-                }
-            }
-            HYDU_dump_noprefix(stdout, ")\n");
-        }
-    }
-
     /* Add the stdout/stderr callback handlers */
     HYD_server_info.stdout_cb = HYD_uiu_stdout_cb;
     HYD_server_info.stderr_cb = HYD_uiu_stderr_cb;
@@ -414,24 +395,22 @@
             continue;
         }
 
-        proc_count = 0;
-        for (exec = proxy->exec_list; exec; exec = exec->next)
-            proc_count += exec->proc_count;
-        for (i = 0; i < proc_count; i++) {
+        if (HYD_ui_mpich_info.print_all_exitcodes)
+            HYDU_dump_noprefix(stdout, "[%s] ", proxy->node->hostname);
+
+        for (i = 0; i < proxy->proxy_process_count; i++) {
             if (HYD_ui_mpich_info.print_all_exitcodes) {
-                HYDU_dump_noprefix(stdout, "[%d]",
-                                   HYDU_local_to_global_id(i, proxy->start_pid,
-                                                           proxy->node.core_count,
-                                                           HYD_server_info.global_core_count));
                 HYDU_dump_noprefix(stdout, "%d", WEXITSTATUS(proxy->exit_status[i]));
-                if (i < proc_count - 1)
+                if (i < proxy->proxy_process_count - 1)
                     HYDU_dump_noprefix(stdout, ",");
             }
+
             exit_status |= proxy->exit_status[i];
         }
+
+        if (HYD_ui_mpich_info.print_all_exitcodes)
+            HYDU_dump_noprefix(stdout, "\n");
     }
-    if (HYD_ui_mpich_info.print_all_exitcodes)
-        HYDU_dump_noprefix(stdout, "\n");
 
     /* Call finalize functions for lower layers to cleanup their resources */
     status = HYD_pmci_finalize();

Modified: mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.h
===================================================================
--- mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.h	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/ui/mpich/mpiexec.h	2011-02-21 20:53:45 UTC (rev 8001)
@@ -12,7 +12,6 @@
 struct HYD_ui_mpich_info {
     int ppn;
     int ckpoint_int;
-    int print_rank_map;
     int print_all_exitcodes;
     int ranks_per_proc;
 

Modified: mpich2/trunk/src/pm/hydra/ui/mpich/utils.c
===================================================================
--- mpich2/trunk/src/pm/hydra/ui/mpich/utils.c	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/ui/mpich/utils.c	2011-02-21 20:53:45 UTC (rev 8001)
@@ -16,7 +16,6 @@
 {
     HYD_ui_mpich_info.ppn = -1;
     HYD_ui_mpich_info.ckpoint_int = -1;
-    HYD_ui_mpich_info.print_rank_map = -1;
     HYD_ui_mpich_info.print_all_exitcodes = -1;
     HYD_ui_mpich_info.ranks_per_proc = -1;
     HYD_ui_mpich_info.sort_order = NONE;
@@ -875,17 +874,6 @@
     goto fn_exit;
 }
 
-static void print_rank_map_help_fn(void)
-{
-    printf("\n");
-    printf("-print-rank-map: Print what ranks are allocated to what nodes\n\n");
-}
-
-static HYD_status print_rank_map_fn(char *arg, char ***argv)
-{
-    return HYDU_set_int(arg, argv, &HYD_ui_mpich_info.print_rank_map, 1);
-}
-
 static void print_all_exitcodes_help_fn(void)
 {
     printf("\n");
@@ -1048,7 +1036,6 @@
     {"debug", verbose_fn, verbose_help_fn},
     {"info", info_fn, info_help_fn},
     {"version", info_fn, info_help_fn},
-    {"print-rank-map", print_rank_map_fn, print_rank_map_help_fn},
     {"print-all-exitcodes", print_all_exitcodes_fn, print_all_exitcodes_help_fn},
     {"iface", iface_fn, iface_help_fn},
     {"nameserver", nameserver_fn, nameserver_help_fn},
@@ -1076,9 +1063,6 @@
         HYDU_ERR_POP(status, "unable to correct wdir\n");
     }
 
-    if (HYD_ui_mpich_info.print_rank_map == -1)
-        HYD_ui_mpich_info.print_rank_map = 0;
-
     if (HYD_ui_mpich_info.print_all_exitcodes == -1)
         HYD_ui_mpich_info.print_all_exitcodes = 0;
 

Modified: mpich2/trunk/src/pm/hydra/ui/utils/uiu.c
===================================================================
--- mpich2/trunk/src/pm/hydra/ui/utils/uiu.c	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/ui/utils/uiu.c	2011-02-21 20:53:45 UTC (rev 8001)
@@ -150,9 +150,8 @@
     for (proxy = HYD_server_info.pg_list.proxy_list; proxy; proxy = proxy->next) {
         HYDU_dump_noprefix(stdout, "      Proxy ID: %2d\n", i++);
         HYDU_dump_noprefix(stdout, "      -----------------\n");
-        HYDU_dump_noprefix(stdout, "        Proxy name: %s\n", proxy->node.hostname);
-        HYDU_dump_noprefix(stdout, "        Process count: %d\n", proxy->node.core_count);
-        HYDU_dump_noprefix(stdout, "        Start PID: %d\n", proxy->start_pid);
+        HYDU_dump_noprefix(stdout, "        Proxy name: %s\n", proxy->node->hostname);
+        HYDU_dump_noprefix(stdout, "        Process count: %d\n", proxy->node->core_count);
         HYDU_dump_noprefix(stdout, "\n");
         HYDU_dump_noprefix(stdout, "        Proxy exec list:\n");
         HYDU_dump_noprefix(stdout, "        ....................\n");
@@ -231,7 +230,7 @@
                     if (proxy->proxy_id == proxy_id)
                         break;
                 HYDU_ASSERT(proxy, status);
-                MPL_snprintf(tmp[i], HYD_TMP_STRLEN, "%s", proxy->node.hostname);
+                MPL_snprintf(tmp[i], HYD_TMP_STRLEN, "%s", proxy->node->hostname);
                 break;
             case '\0':
                 HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,

Modified: mpich2/trunk/src/pm/hydra/utils/alloc/alloc.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/alloc/alloc.c	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/utils/alloc/alloc.c	2011-02-21 20:53:45 UTC (rev 8001)
@@ -85,15 +85,6 @@
         HYDU_FREE(global_env->prop);
 }
 
-static void init_node(struct HYD_node *node)
-{
-    node->hostname = NULL;
-    node->core_count = 0;
-    node->user = NULL;
-    node->local_binding = NULL;
-    node->next = NULL;
-}
-
 HYD_status HYDU_alloc_node(struct HYD_node **node)
 {
     HYD_status status = HYD_SUCCESS;
@@ -101,7 +92,13 @@
     HYDU_FUNC_ENTER();
 
     HYDU_MALLOC(*node, struct HYD_node *, sizeof(struct HYD_node), status);
-    init_node(*node);
+    (*node)->hostname = NULL;
+    (*node)->core_count = 0;
+    (*node)->active_processes = 0;
+    (*node)->node_id = -1;
+    (*node)->user = NULL;
+    (*node)->local_binding = NULL;
+    (*node)->next = NULL;
 
   fn_exit:
     HYDU_FUNC_EXIT();
@@ -111,19 +108,6 @@
     goto fn_exit;
 }
 
-void HYDU_dup_node(struct HYD_node src, struct HYD_node *dest)
-{
-    HYDU_FUNC_ENTER();
-
-    dest->hostname = src.hostname ? HYDU_strdup(src.hostname) : NULL;
-    dest->core_count = src.core_count;
-    dest->user = src.user ? HYDU_strdup(src.user) : NULL;
-    dest->local_binding = src.local_binding ? HYDU_strdup(src.local_binding) : NULL;
-
-    HYDU_FUNC_EXIT();
-    return;
-}
-
 void HYDU_free_node_list(struct HYD_node *node_list)
 {
     struct HYD_node *node, *tnode;
@@ -192,7 +176,8 @@
     }
 }
 
-HYD_status HYDU_alloc_proxy(struct HYD_proxy **proxy, struct HYD_pg *pg)
+static HYD_status alloc_proxy(struct HYD_proxy **proxy, struct HYD_pg *pg,
+                              struct HYD_node *node)
 {
     HYD_status status = HYD_SUCCESS;
 
@@ -200,15 +185,14 @@
 
     HYDU_MALLOC(*proxy, struct HYD_proxy *, sizeof(struct HYD_proxy), status);
 
-    init_node(&(*proxy)->node);
-
+    (*proxy)->node = node;
     (*proxy)->pg = pg;
 
     (*proxy)->proxy_id = -1;
     (*proxy)->exec_launch_info = NULL;
 
-    (*proxy)->start_pid = -1;
     (*proxy)->proxy_process_count = 0;
+    (*proxy)->filler_processes = 0;
 
     (*proxy)->pid = NULL;
     (*proxy)->exit_status = NULL;
@@ -236,12 +220,8 @@
     while (proxy) {
         tproxy = proxy->next;
 
-        if (proxy->node.hostname)
-            HYDU_FREE(proxy->node.hostname);
+        proxy->node = NULL;
 
-        if (proxy->node.local_binding)
-            HYDU_FREE(proxy->node.local_binding);
-
         if (proxy->exec_launch_info) {
             HYDU_free_strlist(proxy->exec_launch_info);
             HYDU_FREE(proxy->exec_launch_info);
@@ -351,6 +331,7 @@
         texec->appnum = exec->appnum;
     }
     proxy->proxy_process_count += num_procs;
+    proxy->node->active_processes += num_procs;
 
   fn_exit:
     return status;
@@ -359,118 +340,179 @@
     goto fn_exit;
 }
 
+static int dceil(int x, int y)
+{
+    int z;
+
+    z = x / y;
+
+    if (z * y == x)
+        return z;
+    else
+        return z + 1;
+}
+
 HYD_status HYDU_create_proxy_list(struct HYD_exec *exec_list, struct HYD_node *node_list,
-                                  struct HYD_pg *pg, int proc_offset)
+                                  struct HYD_pg *pg)
 {
-    struct HYD_proxy *proxy = NULL;
+    struct HYD_proxy *proxy = NULL, *tproxy, *last_proxy;
     struct HYD_exec *exec;
-    struct HYD_node *node, *start_node;
-    int proxy_rem_procs, exec_rem_procs, core_count, procs_left;
-    int total_exec_procs, num_nodes, i, start_pid, offset;
+    struct HYD_node *node;
+    int pg_process_count, process_core_ratio, c, global_core_count, filler_process_count;
+    int num_procs, proxy_rem_cores, exec_rem_procs, global_active_processes, included_cores;
+    int proxy_id, global_node_count, pcr, i;
     HYD_status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
 
-    total_exec_procs = 0;
+    pg_process_count = 0;
     for (exec = exec_list; exec; exec = exec->next)
-        total_exec_procs += exec->proc_count;
+        pg_process_count += exec->proc_count;
+    HYDU_ASSERT(pg_process_count, status);
 
-    num_nodes = 0;
-    core_count = 0;
+    /*
+     * Find the process/core ratio that we can go to. The minimum is
+     * one (meaning there are as many processes as cores in the
+     * system). But if one of the nodes is already oversubscribed, we
+     * take that as a hint to mean that the other nodes can also be
+     * oversubscribed to the same extent.
+     */
+    process_core_ratio = 1;
+    global_node_count = 0;
+    global_core_count = 0;
+    global_active_processes = 0;
     for (node = node_list; node; node = node->next) {
-        num_nodes++;
-        core_count += node->core_count;
+        pcr = dceil(node->active_processes, node->core_count);
+        if (pcr > process_core_ratio)
+            process_core_ratio = pcr;
+        global_node_count++;
+        global_core_count += node->core_count;
+        global_active_processes += node->active_processes;
     }
 
-    /* First create the list of proxies we need */
-    offset = proc_offset % core_count;
-    for (node = node_list; node; node = node->next) {
-        offset -= node->core_count;
-        if (offset < 0)
-            break;
-    }
-    start_node = node;
+    /* Find the number of filler processes before we need to increase
+     * the process/core ratio */
+    filler_process_count = global_core_count * process_core_ratio - global_active_processes;
 
-    if (offset + start_node->core_count) {
-        /* we are starting on some offset within the node; the maximum
-         * number of proxies can be larger than the total number of
-         * nodes, since we might wrap around. */
-        num_nodes++;
-    }
-
-    start_pid = 0;
-    procs_left = total_exec_procs;
-    for (i = 0, node = start_node; i < num_nodes; i++) {
-        if (pg->proxy_list == NULL) {
-            status = HYDU_alloc_proxy(&pg->proxy_list, pg);
-            HYDU_ERR_POP(status, "unable to allocate proxy\n");
-            proxy = pg->proxy_list;
+    /* Create the list of proxies required to accommodate all the
+     * processes. The proxy list follows these rules:
+     *
+     * 1. It will start at the first proxy that has a non-zero number
+     * of available cores.
+     *
+     * 2. The maximum number of proxies cannot exceed the number of
+     * nodes.
+     *
+     * 3. A proxy can never have zero processes assigned to it. The
+     * below loop does not follow this rule; we make a second pass on
+     * the list to enforce this rule.
+     */
+    pg->proxy_list = NULL;
+    last_proxy = NULL;
+    included_cores = 0;
+    pcr = process_core_ratio;
+    for (node = node_list, i = 0; i < global_node_count; node = node->next) {
+        if (node == NULL) {
+            node = node_list;
+            pcr++;
         }
-        else {
-            status = HYDU_alloc_proxy(&proxy->next, pg);
-            HYDU_ERR_POP(status, "unable to allocate proxy\n");
-            proxy = proxy->next;
-        }
 
-        proxy->proxy_id = i;
-        proxy->start_pid = start_pid;
-        HYDU_dup_node(*node, &proxy->node);
-        proxy->node.next = NULL;
+        c = (node->core_count * pcr - node->active_processes);
 
-        /* For the first node, use only the remaining cores. For the
-         * last node, we need to make sure its not oversubscribed
-         * since the first proxy we started on might repeat. */
-        if (i == 0)
-            proxy->node.core_count = -(offset); /* offset is negative */
-        else if (i == (num_nodes - 1) && (offset + start_node->core_count))
-            proxy->node.core_count = node->core_count + offset;
+        if (c == 0 && included_cores == 0)
+            continue;
+
+        included_cores += c;
+
+        /* create a proxy associated with this node */
+        status = alloc_proxy(&proxy, pg, node);
+        HYDU_ERR_POP(status, "error allocating proxy\n");
+
+        proxy->filler_processes = c;
+
+        if (pg->proxy_list == NULL)
+            pg->proxy_list = proxy;
         else
-            proxy->node.core_count = node->core_count;
+            last_proxy->next = proxy;
+        last_proxy = proxy;
 
-        /* If we found enough proxies, break out */
-        start_pid += proxy->node.core_count;
-        procs_left -= proxy->node.core_count;
-        if (procs_left <= 0)
+        if (included_cores >= pg_process_count)
             break;
 
-        node = node->next;
-        /* Handle the wrap around case for the nodes */
-        if (node == NULL)
-            node = node_list;
+        i++;
     }
 
-    /* Now fill the proxies with the appropriate executable
-     * information */
-    proxy = pg->proxy_list;
-    exec = exec_list;
-    proxy_rem_procs = proxy->node.core_count;
-    exec_rem_procs = exec ? exec->proc_count : 0;
-    while (exec) {
-        if (exec_rem_procs <= proxy_rem_procs) {
-            status = add_exec_to_proxy(exec, proxy, exec_rem_procs);
+    /* Proxy list is created; add the executables to the proxy list */
+    if (pg->proxy_list->next == NULL) {
+        /* Special case: there is only one proxy, so all executables
+         * directly get appended to this proxy */
+        for (exec = exec_list; exec; exec = exec->next) {
+            status = add_exec_to_proxy(exec, pg->proxy_list, exec->proc_count);
             HYDU_ERR_POP(status, "unable to add executable to proxy\n");
+        }
+    }
+    else {
+        exec = exec_list;
+        proxy = pg->proxy_list;
 
-            proxy_rem_procs -= exec_rem_procs;
-            if (proxy_rem_procs == 0) {
+        pcr = process_core_ratio;
+
+        exec_rem_procs = exec_list->proc_count;
+        proxy_rem_cores = proxy->node->core_count * pcr - proxy->node->active_processes;
+
+        while (exec) {
+            num_procs = (exec_rem_procs > proxy_rem_cores) ? proxy_rem_cores : exec_rem_procs;
+
+            exec_rem_procs -= num_procs;
+            proxy_rem_cores -= num_procs;
+
+            if (num_procs) {
+                status = add_exec_to_proxy(exec, proxy, num_procs);
+                HYDU_ERR_POP(status, "unable to add executable to proxy\n");
+            }
+
+            if (exec_rem_procs == 0) {
+                exec = exec->next;
+                if (exec)
+                    exec_rem_procs = exec->proc_count;
+                else
+                    break;
+            }
+
+            if (proxy_rem_cores == 0) {
                 proxy = proxy->next;
+
                 if (proxy == NULL)
                     proxy = pg->proxy_list;
-                proxy_rem_procs = proxy->node.core_count;
+
+                if (proxy->node->node_id == 0)
+                    pcr++;
+
+                proxy_rem_cores = proxy->node->core_count * pcr - proxy->node->active_processes;
             }
+        }
+    }
 
-            exec = exec->next;
-            exec_rem_procs = exec ? exec->proc_count : 0;
+    /* Get rid of the proxies that do not have any executables
+     * attached to them */
+    while (pg->proxy_list->exec_list == NULL) {
+        tproxy = pg->proxy_list;
+        pg->proxy_list = tproxy->next;
+        tproxy->next = NULL;
+        HYDU_free_proxy_list(tproxy);
+    }
+
+    pg->proxy_list->proxy_id = proxy_id = 0;
+    for (proxy = pg->proxy_list; proxy->next;) {
+        if (proxy->next->exec_list == NULL) {
+            tproxy = proxy->next;
+            proxy->next = tproxy->next;
+            tproxy->next = NULL;
+            HYDU_free_proxy_list(tproxy);
         }
         else {
-            status = add_exec_to_proxy(exec, proxy, proxy_rem_procs);
-            HYDU_ERR_POP(status, "unable to add executable to proxy\n");
-
-            exec_rem_procs -= proxy_rem_procs;
-
             proxy = proxy->next;
-            if (proxy == NULL)
-                proxy = pg->proxy_list;
-            proxy_rem_procs = proxy->node.core_count;
+            proxy->proxy_id = ++proxy_id;
         }
     }
 

Modified: mpich2/trunk/src/pm/hydra/utils/others/others.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/others/others.c	2011-02-21 17:28:58 UTC (rev 8000)
+++ mpich2/trunk/src/pm/hydra/utils/others/others.c	2011-02-21 20:53:45 UTC (rev 8001)
@@ -6,13 +6,8 @@
 
 #include "hydra.h"
 
-int HYDU_local_to_global_id(int local_id, int start_pid, int core_count, int global_core_count)
-{
-    return ((local_id / core_count) * global_core_count) + (local_id % core_count) + start_pid;
-}
-
 HYD_status HYDU_add_to_node_list(const char *hostname, int num_procs,
-                                 struct HYD_node ** node_list)
+                                 struct HYD_node **node_list)
 {
     struct HYD_node *node;
     HYD_status status = HYD_SUCCESS;



More information about the mpich2-commits mailing list