[mpich2-commits] r4170 - in mpich2/trunk/src/pm/hydra: bootstrap/fork bootstrap/slurm bootstrap/ssh include launcher/mpiexec launcher/utils pm/pmiserv utils/args utils/bind utils/env utils/string

balaji at mcs.anl.gov balaji at mcs.anl.gov
Mon Mar 23 01:36:59 CDT 2009


Author: balaji
Date: 2009-03-23 01:36:59 -0500 (Mon, 23 Mar 2009)
New Revision: 4170

Modified:
   mpich2/trunk/src/pm/hydra/bootstrap/fork/fork_launch.c
   mpich2/trunk/src/pm/hydra/bootstrap/slurm/slurm_launch.c
   mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_launch.c
   mpich2/trunk/src/pm/hydra/include/hydra.h
   mpich2/trunk/src/pm/hydra/include/hydra_base.h
   mpich2/trunk/src/pm/hydra/include/hydra_utils.h
   mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c
   mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle_v1.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.h
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy_utils.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_cb.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c
   mpich2/trunk/src/pm/hydra/utils/args/args.c
   mpich2/trunk/src/pm/hydra/utils/bind/bind.c
   mpich2/trunk/src/pm/hydra/utils/env/env.c
   mpich2/trunk/src/pm/hydra/utils/string/string.c
Log:
Added a first cut of user-defined process-core mappings. This is still
one-step away from the most general case.


Modified: mpich2/trunk/src/pm/hydra/bootstrap/fork/fork_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/fork/fork_launch.c	2009-03-23 04:53:56 UTC (rev 4169)
+++ mpich2/trunk/src/pm/hydra/bootstrap/fork/fork_launch.c	2009-03-23 06:36:59 UTC (rev 4170)
@@ -14,7 +14,7 @@
 HYD_Status HYD_BSCD_fork_launch_procs(void)
 {
     struct HYD_Partition *partition;
-    char *client_arg[HYD_EXEC_ARGS];
+    char *client_arg[HYD_NUM_TMP_STRINGS];
     int i, arg, process_id;
     HYD_Status status = HYD_SUCCESS;
 

Modified: mpich2/trunk/src/pm/hydra/bootstrap/slurm/slurm_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/slurm/slurm_launch.c	2009-03-23 04:53:56 UTC (rev 4169)
+++ mpich2/trunk/src/pm/hydra/bootstrap/slurm/slurm_launch.c	2009-03-23 06:36:59 UTC (rev 4170)
@@ -14,7 +14,7 @@
 HYD_Status HYD_BSCD_slurm_launch_procs(void)
 {
     struct HYD_Partition *partition;
-    char *client_arg[HYD_EXEC_ARGS];
+    char *client_arg[HYD_NUM_TMP_STRINGS];
     int i, arg, process_id;
     HYD_Status status = HYD_SUCCESS;
 

Modified: mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_launch.c	2009-03-23 04:53:56 UTC (rev 4169)
+++ mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_launch.c	2009-03-23 06:36:59 UTC (rev 4170)
@@ -20,7 +20,7 @@
 HYD_Status HYD_BSCD_ssh_launch_procs(void)
 {
     struct HYD_Partition *partition;
-    char *client_arg[HYD_EXEC_ARGS];
+    char *client_arg[HYD_NUM_TMP_STRINGS];
     int i, arg, process_id;
     HYD_Status status = HYD_SUCCESS;
 

Modified: mpich2/trunk/src/pm/hydra/include/hydra.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra.h	2009-03-23 04:53:56 UTC (rev 4169)
+++ mpich2/trunk/src/pm/hydra/include/hydra.h	2009-03-23 06:36:59 UTC (rev 4170)
@@ -16,6 +16,7 @@
     int proxy_port;
     char *bootstrap;
     HYD_Binding binding;
+    char *user_bind_map;
 
     int debug;
     int enablex;

Modified: mpich2/trunk/src/pm/hydra/include/hydra_base.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra_base.h	2009-03-23 04:53:56 UTC (rev 4169)
+++ mpich2/trunk/src/pm/hydra/include/hydra_base.h	2009-03-23 06:36:59 UTC (rev 4170)
@@ -64,7 +64,7 @@
 typedef unsigned short HYD_Event_t;
 
 #define HYD_TMPBUF_SIZE (64 * 1024)
-#define HYD_EXEC_ARGS 200
+#define HYD_NUM_TMP_STRINGS 200
 
 
 /* Status information */
@@ -111,7 +111,7 @@
 
 /* Executables on a partition */
 struct HYD_Partition_exec {
-    char *exec[HYD_EXEC_ARGS];
+    char *exec[HYD_NUM_TMP_STRINGS];
     int proc_count;
     HYD_Env_prop_t prop;
     HYD_Env_t *prop_env;
@@ -136,14 +136,14 @@
     int out;
     int err;
     int exit_status;
-    char *proxy_args[HYD_EXEC_ARGS];    /* Full argument list */
+    char *proxy_args[HYD_NUM_TMP_STRINGS];    /* Full argument list */
 
     struct HYD_Partition *next;
 };
 
 struct HYD_Exec_info {
     int exec_proc_count;
-    char *exec[HYD_EXEC_ARGS];
+    char *exec[HYD_NUM_TMP_STRINGS];
 
     /* Local environment */
     HYD_Env_t *user_env;

Modified: mpich2/trunk/src/pm/hydra/include/hydra_utils.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra_utils.h	2009-03-23 04:53:56 UTC (rev 4169)
+++ mpich2/trunk/src/pm/hydra/include/hydra_utils.h	2009-03-23 06:36:59 UTC (rev 4170)
@@ -17,7 +17,7 @@
 #if defined PROC_BINDING
 #include "plpa.h"
 #include "plpa_internal.h"
-HYD_Status HYDU_bind_init(void);
+HYD_Status HYDU_bind_init(char *user_bind_map);
 void HYDU_bind_process(int core);
 int HYDU_next_core(int core, HYD_Binding binding);
 #else
@@ -105,8 +105,6 @@
 
 
 /* Memory utilities */
-#define HYDU_NUM_JOIN_STR 100
-
 #define HYDU_MALLOC(p, type, size, status)                              \
     {                                                                   \
         (p) = (type) MPIU_Malloc((size));                               \

Modified: mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c	2009-03-23 04:53:56 UTC (rev 4169)
+++ mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c	2009-03-23 06:36:59 UTC (rev 4170)
@@ -34,7 +34,7 @@
 {
     int i;
     char **argv = t_argv;
-    char *env_name, *env_value, *str1, *str2, *progname = *argv;
+    char *env_name, *env_value, *str[4] = { NULL }, *progname = *argv;
     HYD_Env_t *env;
     struct HYD_Exec_info *exec_info;
     HYD_Status status = HYD_SUCCESS;
@@ -183,49 +183,58 @@
 
         /* The below options allow for "--foo=x" form of argument,
          * instead of "--foo x" for convenience. */
-        status = HYDU_strsplit(*argv, &str1, &str2, '=');
+        status = HYDU_strsplit(*argv, &str[0], &str[1], '=');
         HYDU_ERR_POP(status, "string break returned error\n");
 
-        if (!strcmp(str1, "--bootstrap")) {
-            if (!str2) {
+        if (!strcmp(str[0], "--bootstrap")) {
+            if (!str[1]) {
                 INCREMENT_ARGV(status);
-                str2 = *argv;
+                str[1] = *argv;
             }
             HYDU_ERR_CHKANDJUMP(status, handle.bootstrap, HYD_INTERNAL_ERROR,
                                 "duplicate bootstrap server\n");
-            handle.bootstrap = MPIU_Strdup(str2);
+            handle.bootstrap = MPIU_Strdup(str[1]);
             continue;
         }
 
-        if (!strcmp(str1, "--binding")) {
-            if (!str2) {
+        if (!strcmp(str[0], "--binding")) {
+            if (!str[1]) {
                 INCREMENT_ARGV(status);
-                str2 = *argv;
+                str[1] = *argv;
             }
             HYDU_ERR_CHKANDJUMP(status, handle.binding != HYD_BIND_UNSET,
                                 HYD_INTERNAL_ERROR, "duplicate binding\n");
-            if (!strcmp(str2, "none"))
+            if (!strcmp(str[1], "none"))
                 handle.binding = HYD_BIND_NONE;
-            else if (!strcmp(str2, "rr"))
+            else if (!strcmp(str[1], "rr"))
                 handle.binding = HYD_BIND_RR;
-            else if (!strcmp(str2, "buddy"))
+            else if (!strcmp(str[1], "buddy"))
                 handle.binding = HYD_BIND_BUDDY;
-            else if (!strcmp(str2, "pack"))
+            else if (!strcmp(str[1], "pack"))
                 handle.binding = HYD_BIND_PACK;
-            else if (!strcmp(str2, "user"))
-                handle.binding = HYD_BIND_USER;
+            else {
+                /* Check if the user wants to specify her own mapping */
+                status = HYDU_strsplit(str[1], &str[2], &str[3], ':');
+                HYDU_ERR_POP(status, "string break returned error\n");
 
+                if (!strcmp(str[2], "user")) {
+                    handle.binding = HYD_BIND_USER;
+                    if (str[3])
+                        handle.user_bind_map = MPIU_Strdup(str[3]);
+                }
+            }
+
             continue;
         }
 
-        if (!strcmp(str1, "--proxy-port")) {
-            if (!str2) {
+        if (!strcmp(str[0], "--proxy-port")) {
+            if (!str[1]) {
                 INCREMENT_ARGV(status);
-                str2 = *argv;
+                str[1] = *argv;
             }
             HYDU_ERR_CHKANDJUMP(status, handle.proxy_port != -1, HYD_INTERNAL_ERROR,
                                 "duplicate proxy port\n");
-            handle.proxy_port = atoi(str2);
+            handle.proxy_port = atoi(str[1]);
             continue;
         }
 
@@ -305,10 +314,9 @@
         handle.proxy_port = HYD_DEFAULT_PROXY_PORT;
 
   fn_exit:
-    if (str1)
-        HYDU_FREE(str1);
-    if (str2)
-        HYDU_FREE(str2);
+    for (i = 0; i < 4; i++)
+        if (str[i])
+            HYDU_FREE(str[i]);
     HYDU_FUNC_EXIT();
     return status;
 

Modified: mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c	2009-03-23 04:53:56 UTC (rev 4169)
+++ mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c	2009-03-23 06:36:59 UTC (rev 4170)
@@ -16,6 +16,7 @@
     handle.proxy_port = -1;
     handle.bootstrap = NULL;
     handle.binding = HYD_BIND_UNSET;
+    handle.user_bind_map = NULL;
 
     handle.debug = -1;
     handle.enablex = -1;

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle.c	2009-03-23 04:53:56 UTC (rev 4169)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle.c	2009-03-23 06:36:59 UTC (rev 4170)
@@ -158,7 +158,7 @@
 HYD_Status HYD_PMCD_pmi_init(int fd, char *args[])
 {
     int pmi_version, pmi_subversion;
-    char *tmp[HYDU_NUM_JOIN_STR];
+    char *tmp[HYD_NUM_TMP_STRINGS];
     HYD_Status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle_v1.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle_v1.c	2009-03-23 04:53:56 UTC (rev 4169)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle_v1.c	2009-03-23 06:36:59 UTC (rev 4170)
@@ -82,7 +82,7 @@
 HYD_Status HYD_PMCD_pmi_handle_v1_initack(int fd, char *args[])
 {
     int id, size, debug, i;
-    char *tmp[HYDU_NUM_JOIN_STR], *cmd;
+    char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
     struct HYD_Partition *partition;
     struct HYD_Partition_exec *exec;
     HYD_PMCD_pmi_pg_t *run;
@@ -141,7 +141,7 @@
 HYD_Status HYD_PMCD_pmi_handle_v1_get_maxes(int fd, char *args[])
 {
     int i;
-    char *tmp[HYDU_NUM_JOIN_STR], *cmd;
+    char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
     HYD_Status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
@@ -177,7 +177,7 @@
 
 HYD_Status HYD_PMCD_pmi_handle_v1_get_appnum(int fd, char *args[])
 {
-    char *tmp[HYDU_NUM_JOIN_STR], *cmd;
+    char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
     int i;
     HYD_PMCD_pmi_process_t *process;
     HYD_Status status = HYD_SUCCESS;
@@ -216,7 +216,7 @@
 
 HYD_Status HYD_PMCD_pmi_handle_v1_get_my_kvsname(int fd, char *args[])
 {
-    char *tmp[HYDU_NUM_JOIN_STR], *cmd;
+    char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
     int i;
     HYD_PMCD_pmi_process_t *process;
     HYD_Status status = HYD_SUCCESS;
@@ -296,7 +296,7 @@
     HYD_PMCD_pmi_process_t *process;
     HYD_PMCD_pmi_kvs_pair_t *key_pair, *run;
     char *kvsname, *key, *val, *key_pair_str = NULL;
-    char *tmp[HYDU_NUM_JOIN_STR], *cmd;
+    char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
     HYD_Status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
@@ -370,7 +370,7 @@
     HYD_PMCD_pmi_process_t *process;
     HYD_PMCD_pmi_kvs_pair_t *run;
     char *kvsname, *key;
-    char *tmp[HYDU_NUM_JOIN_STR], *cmd, *key_val_str = NULL;
+    char *tmp[HYD_NUM_TMP_STRINGS], *cmd, *key_val_str = NULL;
     HYD_Status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
@@ -464,7 +464,7 @@
 HYD_Status HYD_PMCD_pmi_handle_v1_get_usize(int fd, char *args[])
 {
     int usize, i;
-    char *tmp[HYDU_NUM_JOIN_STR], *cmd;
+    char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
     HYD_Status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.c	2009-03-23 04:53:56 UTC (rev 4169)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.c	2009-03-23 06:36:59 UTC (rev 4170)
@@ -15,10 +15,9 @@
 int main(int argc, char **argv)
 {
     int i, j, arg, count, pid, ret_status;
-    int stdin_fd, timeout, process_id, core, pmi_id, rem;
-    char *str, *timeout_str;
-    char *client_args[HYD_EXEC_ARGS];
-    char *tmp[HYDU_NUM_JOIN_STR];
+    int stdin_fd, process_id, core, pmi_id, rem;
+    char *str;
+    char *client_args[HYD_NUM_TMP_STRINGS];
     HYD_Env_t *env;
     struct HYD_Partition_exec *exec;
     struct HYD_Partition_segment *segment;
@@ -69,7 +68,7 @@
     status = HYDU_putenv_list(HYD_PMCD_pmi_proxy_params.global_env);
     HYDU_ERR_POP(status, "putenv returned error\n");
 
-    status = HYDU_bind_init();
+    status = HYDU_bind_init(HYD_PMCD_pmi_proxy_params.user_bind_map);
     HYDU_ERR_POP(status, "unable to initialize process binding\n");
 
     /* Spawn the processes */
@@ -113,8 +112,7 @@
                                              &HYD_PMCD_pmi_proxy_params.in,
                                              &HYD_PMCD_pmi_proxy_params.out[process_id],
                                              &HYD_PMCD_pmi_proxy_params.err[process_id],
-                                             &HYD_PMCD_pmi_proxy_params.pid[process_id],
-                                             core);
+                                             &HYD_PMCD_pmi_proxy_params.pid[process_id], core);
 
                 status = HYDU_sock_set_nonblock(HYD_PMCD_pmi_proxy_params.in);
                 HYDU_ERR_POP(status, "unable to set socket as non-blocking\n");
@@ -125,7 +123,8 @@
 
                 HYD_PMCD_pmi_proxy_params.stdin_buf_offset = 0;
                 HYD_PMCD_pmi_proxy_params.stdin_buf_count = 0;
-                status = HYD_DMX_register_fd(1, &stdin_fd, HYD_STDIN, HYD_PMCD_pmi_proxy_stdin_cb);
+                status =
+                    HYD_DMX_register_fd(1, &stdin_fd, HYD_STDIN, HYD_PMCD_pmi_proxy_stdin_cb);
                 HYDU_ERR_POP(status, "unable to register fd\n");
             }
             else {
@@ -133,8 +132,7 @@
                                              NULL,
                                              &HYD_PMCD_pmi_proxy_params.out[process_id],
                                              &HYD_PMCD_pmi_proxy_params.err[process_id],
-                                             &HYD_PMCD_pmi_proxy_params.pid[process_id],
-                                             core);
+                                             &HYD_PMCD_pmi_proxy_params.pid[process_id], core);
             }
             HYDU_ERR_POP(status, "spawn process returned error\n");
 

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.h
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.h	2009-03-23 04:53:56 UTC (rev 4169)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.h	2009-03-23 06:36:59 UTC (rev 4170)
@@ -14,6 +14,8 @@
     int proxy_port;
     char *wdir;
     HYD_Binding binding;
+    char *user_bind_map;
+
     HYD_Env_t *global_env;
 
     int one_pass_count;

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy_utils.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy_utils.c	2009-03-23 04:53:56 UTC (rev 4169)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy_utils.c	2009-03-23 06:36:59 UTC (rev 4170)
@@ -46,6 +46,11 @@
         if (!strcmp(*argv, "--binding")) {
             argv++;
             HYD_PMCD_pmi_proxy_params.binding = atoi(*argv);
+            argv++;
+            if (!strcmp(*argv, "HYDRA_NO_USER_MAP"))
+                HYD_PMCD_pmi_proxy_params.user_bind_map = NULL;
+            else
+                HYD_PMCD_pmi_proxy_params.user_bind_map = MPIU_Strdup(*argv);
             continue;
         }
 

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_cb.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_cb.c	2009-03-23 04:53:56 UTC (rev 4169)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_cb.c	2009-03-23 06:36:59 UTC (rev 4170)
@@ -30,7 +30,7 @@
 HYD_Status HYD_PMCD_pmi_serv_cb(int fd, HYD_Event_t events)
 {
     int accept_fd, linelen, i;
-    char *buf = NULL, *cmd, *args[HYD_EXEC_ARGS];
+    char *buf = NULL, *cmd, *args[HYD_NUM_TMP_STRINGS];
     char *str1 = NULL, *str2 = NULL;
     struct HYD_PMCD_pmi_handle *h;
     HYD_Status status = HYD_SUCCESS;
@@ -77,7 +77,7 @@
         buf[linelen - 1] = 0;
 
         cmd = strtok(buf, " ");
-        for (i = 0; i < HYD_EXEC_ARGS; i++) {
+        for (i = 0; i < HYD_NUM_TMP_STRINGS; i++) {
             args[i] = strtok(NULL, " ");
             if (args[i] == NULL)
                 break;

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c	2009-03-23 04:53:56 UTC (rev 4169)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c	2009-03-23 06:36:59 UTC (rev 4170)
@@ -41,7 +41,7 @@
     int i, arg, process_id;
     char hostname[MAX_HOSTNAME_LEN];
     HYD_Env_t *env;
-    char *path_str[HYDU_NUM_JOIN_STR];
+    char *path_str[HYD_NUM_TMP_STRINGS];
     struct HYD_Partition *partition;
     struct HYD_Partition_exec *exec;
     struct HYD_Partition_segment *segment;
@@ -128,6 +128,10 @@
 
         partition->proxy_args[arg++] = MPIU_Strdup("--binding");
         partition->proxy_args[arg++] = HYDU_int_to_str(handle.binding);
+        if (handle.user_bind_map)
+            partition->proxy_args[arg++] = MPIU_Strdup(handle.user_bind_map);
+        else
+            partition->proxy_args[arg++] = MPIU_Strdup("HYDRA_NO_USER_MAP");
 
         /* Pass the global environment separately, instead of for each
          * executable, as an optimization */

Modified: mpich2/trunk/src/pm/hydra/utils/args/args.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/args/args.c	2009-03-23 04:53:56 UTC (rev 4169)
+++ mpich2/trunk/src/pm/hydra/utils/args/args.c	2009-03-23 06:36:59 UTC (rev 4170)
@@ -9,7 +9,7 @@
 HYD_Status HYDU_get_base_path(char *execname, char *wdir, char **path)
 {
     char *loc, *post;
-    char *path_str[HYDU_NUM_JOIN_STR];
+    char *path_str[HYD_NUM_TMP_STRINGS];
     HYD_Status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();

Modified: mpich2/trunk/src/pm/hydra/utils/bind/bind.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/bind/bind.c	2009-03-23 04:53:56 UTC (rev 4169)
+++ mpich2/trunk/src/pm/hydra/utils/bind/bind.c	2009-03-23 06:36:59 UTC (rev 4170)
@@ -13,9 +13,13 @@
     int num_cores;
 
     int **bind_map;
-} bind_info = { 0, -1, -1, -1 , NULL };
 
-HYD_Status HYDU_bind_init(void)
+    int user_bind_valid;
+    int *user_bind_map;
+} bind_info = {
+0, -1, -1, -1, NULL, 0, NULL};
+
+HYD_Status HYDU_bind_init(char *user_bind_map)
 {
     PLPA_NAME(api_type_t) p;
     int ret, supported, i, j;
@@ -23,35 +27,36 @@
     int num_sockets = -1, max_socket_id;
     int num_cores = -1, max_core_id;
     int socket, core;
+    char *str;
     HYD_Status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
 
-    if (!((PLPA_NAME(api_probe)(&p) == 0) && (p == PLPA_NAME_CAPS(PROBE_OK)))) {
+    if (!((PLPA_NAME(api_probe) (&p) == 0) && (p == PLPA_NAME_CAPS(PROBE_OK)))) {
         /* If this failed, we just return without binding */
         HYDU_Warn_printf("plpa api probe failed; not binding\n");
         goto fn_exit;
     }
 
     /* We need topology information too */
-    ret = PLPA_NAME(have_topology_information)(&supported);
+    ret = PLPA_NAME(have_topology_information) (&supported);
     if ((ret == 0) && (supported == 1)) {
         /* Find the maximum number of processing elements */
-        ret = PLPA_NAME(get_processor_data)(PLPA_NAME_CAPS(COUNT_ALL), &num_procs,
-                                            &max_proc_id);
+        ret = PLPA_NAME(get_processor_data) (PLPA_NAME_CAPS(COUNT_ALL), &num_procs,
+                                             &max_proc_id);
         if (ret) {
             HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                                 "plpa get processor data failed\n");
         }
 
         /* PLPA only gives information about sockets and cores */
-        ret = PLPA_NAME(get_socket_info)(&num_sockets, &max_socket_id);
+        ret = PLPA_NAME(get_socket_info) (&num_sockets, &max_socket_id);
         if (ret) {
             HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                                 "plpa get processor data failed\n");
         }
 
-        ret = PLPA_NAME(get_core_info)(0, &num_cores, &max_core_id);
+        ret = PLPA_NAME(get_core_info) (0, &num_cores, &max_core_id);
         if (ret) {
             HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                                 "plpa get processor data failed\n");
@@ -65,7 +70,7 @@
         }
 
         for (i = 0; i < num_sockets * num_cores; i++) {
-            ret = PLPA_NAME(map_to_socket_core)(i, &socket, &core);
+            ret = PLPA_NAME(map_to_socket_core) (i, &socket, &core);
             if (ret)
                 HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                                     "plpa map_to_socket_core failed\n");
@@ -78,6 +83,24 @@
         goto fn_exit;
     }
 
+    if (user_bind_map) {
+        HYDU_MALLOC(bind_info.user_bind_map, int *, num_cores * num_sockets * sizeof(int),
+                    status);
+        for (i = 0; i < num_sockets * num_cores; i++)
+            bind_info.user_bind_map[i] = -1;
+
+        bind_info.user_bind_valid = 1;
+        i = 0;
+        str = strtok(user_bind_map, ",");
+        do {
+            if (!str || i >= num_cores * num_sockets)
+                break;
+            bind_info.user_bind_map[i++] = atoi(str);
+            fflush(stdout);
+            str = strtok(NULL, ",");
+        } while (1);
+    }
+
     bind_info.supported = 1;
     bind_info.num_procs = num_procs;
     bind_info.num_sockets = num_sockets;
@@ -100,9 +123,9 @@
     HYDU_FUNC_ENTER();
 
     if (bind_info.supported) {
-        PLPA_NAME_CAPS(CPU_ZERO)(&cpuset);
-        PLPA_NAME_CAPS(CPU_SET)(core % bind_info.num_procs, &cpuset);
-        ret = PLPA_NAME(sched_setaffinity)(0, 1, &cpuset);
+        PLPA_NAME_CAPS(CPU_ZERO) (&cpuset);
+        PLPA_NAME_CAPS(CPU_SET) (core % bind_info.num_procs, &cpuset);
+        ret = PLPA_NAME(sched_setaffinity) (0, 1, &cpuset);
         if (ret)
             HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "plpa setaffinity failed\n");
     }
@@ -118,9 +141,8 @@
 
 int HYDU_next_core(int old_core, HYD_Binding binding)
 {
-    int socket, core, proc;
-    int ret;
-    HYD_Status status = HYD_SUCCESS;
+    int socket = 0, core = 0;
+    int i, j;
 
     HYDU_FUNC_ENTER();
 
@@ -130,7 +152,7 @@
             return ((old_core + 1) % (bind_info.num_sockets * bind_info.num_cores));
         }
         else if (binding == HYD_BIND_BUDDY) {
-            if (old_core == -1)
+            if (old_core < -1)
                 return 0;
 
             /* First find the old core */
@@ -146,18 +168,18 @@
             /* If there is another socket available after this, give
              * the same core ID on that socket */
             if (socket < bind_info.num_sockets - 1)
-                return bind_info.bind_map[socket+1][core];
+                return bind_info.bind_map[socket + 1][core];
             /* If we are the last socket, and there is a core left
              * after ours, give that core on the first socket */
             else if (core < bind_info.num_cores - 1)
-                return bind_info.bind_map[0][core+1];
+                return bind_info.bind_map[0][core + 1];
             /* If we are the last socket and last core, loop back to
              * the start */
             else
                 return bind_info.bind_map[0][0];
         }
         else if (binding == HYD_BIND_PACK) {
-            if (old_core == -1)
+            if (old_core < -1)
                 return 0;
 
             /* First find the old core */
@@ -173,11 +195,11 @@
             /* If there is another core available after this, give
              * that core ID on the same socket */
             if (core < bind_info.num_cores - 1)
-                return bind_info.bind_map[socket][core+1];
+                return bind_info.bind_map[socket][core + 1];
             /* If we are the last core, and there is a socket left
              * after ours, give the first core on that socket */
             else if (socket < bind_info.num_sockets - 1)
-                return bind_info.bind_map[socket+1][0];
+                return bind_info.bind_map[socket + 1][0];
             /* If we are the last socket and last core, loop back to
              * the start */
             else
@@ -187,18 +209,26 @@
             return -1;
         }
         else if (binding == HYD_BIND_USER) {
-            HYDU_Error_printf("User-specified binding is not supported yet\n");
-            return -1;
+            if (!bind_info.user_bind_valid)
+                return -1;
+
+            if (old_core < 0)
+                return bind_info.user_bind_map[0];
+
+            for (i = 0; i < ((bind_info.num_cores * bind_info.num_sockets) - 1); i++) {
+                if (bind_info.user_bind_map[i] == old_core) {
+                    j = ((i + 1) % (bind_info.num_cores * bind_info.num_sockets));
+                    return bind_info.user_bind_map[j];
+                }
+                else if (bind_info.user_bind_map[i] == -1)
+                    return -1;
+            }
         }
     }
     else {
         HYDU_Error_printf("Process-core binding is not supported on this platform\n");
     }
 
-  fn_exit:
     HYDU_FUNC_EXIT();
     return -1;
-
-  fn_fail:
-    goto fn_exit;
 }

Modified: mpich2/trunk/src/pm/hydra/utils/env/env.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/env/env.c	2009-03-23 04:53:56 UTC (rev 4169)
+++ mpich2/trunk/src/pm/hydra/utils/env/env.c	2009-03-23 06:36:59 UTC (rev 4170)
@@ -9,7 +9,7 @@
 static HYD_Status env_to_str(HYD_Env_t * env, char **str)
 {
     int i;
-    char *tmp[HYDU_NUM_JOIN_STR];
+    char *tmp[HYD_NUM_TMP_STRINGS];
     HYD_Status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
@@ -310,7 +310,7 @@
 
 HYD_Status HYDU_putenv(HYD_Env_t * env)
 {
-    char *tmp[HYDU_NUM_JOIN_STR], *str;
+    char *tmp[HYD_NUM_TMP_STRINGS], *str;
     int i;
     HYD_Status status = HYD_SUCCESS;
 
@@ -341,8 +341,6 @@
 HYD_Status HYDU_putenv_list(HYD_Env_t * env_list)
 {
     HYD_Env_t *env;
-    int i;
-    char *tmp[HYDU_NUM_JOIN_STR], *str;
     HYD_Status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();

Modified: mpich2/trunk/src/pm/hydra/utils/string/string.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/string/string.c	2009-03-23 04:53:56 UTC (rev 4169)
+++ mpich2/trunk/src/pm/hydra/utils/string/string.c	2009-03-23 06:36:59 UTC (rev 4170)
@@ -96,7 +96,7 @@
     if ((*str1)[i] == 0)        /* End of the string */
         *str2 = NULL;
     else {
-        *str2 = &((*str1)[i + 1]);
+        *str2 = MPIU_Strdup(&((*str1)[i + 1]));
         (*str1)[i] = 0;
     }
 



More information about the mpich2-commits mailing list