[mpich2-commits] r4171 - in mpich2/trunk/src/pm/hydra: . include pm/pmiserv utils/bind utils/launch utils/string

balaji at mcs.anl.gov balaji at mcs.anl.gov
Mon Mar 23 04:20:17 CDT 2009


Author: balaji
Date: 2009-03-23 04:20:17 -0500 (Mon, 23 Mar 2009)
New Revision: 4171

Modified:
   mpich2/trunk/src/pm/hydra/README
   mpich2/trunk/src/pm/hydra/include/hydra_base.h
   mpich2/trunk/src/pm/hydra/include/hydra_utils.h
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c
   mpich2/trunk/src/pm/hydra/utils/bind/bind.c
   mpich2/trunk/src/pm/hydra/utils/launch/allocate.c
   mpich2/trunk/src/pm/hydra/utils/string/string.c
Log:
This should allow the user to specify process-core mappings within the
hosts file. This is the most general case of such specification. All
other cases can be implemented using this, but are still provided for
two reasons: (1) convenience and (2) the user might not have all the
required topology information for the system processor architecture.

This should resolve ticket #457.


Modified: mpich2/trunk/src/pm/hydra/README
===================================================================
--- mpich2/trunk/src/pm/hydra/README	2009-03-23 06:36:59 UTC (rev 4170)
+++ mpich2/trunk/src/pm/hydra/README	2009-03-23 09:20:17 UTC (rev 4171)
@@ -144,15 +144,18 @@
 
  $ cat hosts
 
-   donner:4    map=1,2,3,4
-   foo:4       map=4,3,2,1
+   donner:4    map=0,-1,-1,3
+   foo:4       map=3,2
    shakey:2
 
-Using this method, each host can be given a different mapping. Note
-that the last line does not have any mapping: in this case, the
-processes on that machine are not bound to any core.
+Using this method, each host can be given a different mapping. Any
+unspecified mappings are treated as (-1), referring to no binding.
 
+Command-line based mappings are given a higher priority than the
+host-file based mappings. So, if a mapping is given at both places,
+the host-file mappings are ignored.
 
+
 X Forwarding
 ------------
 

Modified: mpich2/trunk/src/pm/hydra/include/hydra_base.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra_base.h	2009-03-23 06:36:59 UTC (rev 4170)
+++ mpich2/trunk/src/pm/hydra/include/hydra_base.h	2009-03-23 09:20:17 UTC (rev 4171)
@@ -64,6 +64,7 @@
 typedef unsigned short HYD_Event_t;
 
 #define HYD_TMPBUF_SIZE (64 * 1024)
+#define HYD_TMP_STRLEN  1024
 #define HYD_NUM_TMP_STRINGS 200
 
 
@@ -121,6 +122,7 @@
 /* Partition information */
 struct HYD_Partition {
     char *name;
+    char *user_bind_map;
     int total_proc_count;
 
     /* Segment list will contain one-pass of the hosts file */
@@ -136,7 +138,7 @@
     int out;
     int err;
     int exit_status;
-    char *proxy_args[HYD_NUM_TMP_STRINGS];    /* Full argument list */
+    char *proxy_args[HYD_NUM_TMP_STRINGS];      /* Full argument list */
 
     struct HYD_Partition *next;
 };

Modified: mpich2/trunk/src/pm/hydra/include/hydra_utils.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra_utils.h	2009-03-23 06:36:59 UTC (rev 4170)
+++ mpich2/trunk/src/pm/hydra/include/hydra_utils.h	2009-03-23 09:20:17 UTC (rev 4171)
@@ -19,11 +19,11 @@
 #include "plpa_internal.h"
 HYD_Status HYDU_bind_init(char *user_bind_map);
 void HYDU_bind_process(int core);
-int HYDU_next_core(int core, HYD_Binding binding);
+int HYDU_bind_get_core_id(int id, HYD_Binding binding);
 #else
 #define HYDU_bind_init(...) HYD_SUCCESS
 #define HYDU_bind_process(...) HYD_SUCCESS
-#define HYDU_next_core(...) (-1)
+#define HYDU_bind_get_core_id(...) (-1)
 #endif /* PROC_BINDING */
 
 
@@ -50,6 +50,8 @@
 HYD_Status HYDU_alloc_partition_segment(struct HYD_Partition_segment **segment);
 HYD_Status HYDU_merge_partition_segment(char *name, struct HYD_Partition_segment *segment,
                                         struct HYD_Partition **partition_list);
+HYD_Status HYDU_merge_partition_mapping(char *name, char *map, int num_procs,
+                                        struct HYD_Partition **partition_list);
 HYD_Status HYDU_alloc_partition_exec(struct HYD_Partition_exec **exec);
 HYD_Status HYDU_create_host_list(char *host_file, struct HYD_Partition **partition_list);
 HYD_Status HYDU_create_process(char **client_arg, HYD_Env_t * env_list,
@@ -105,6 +107,8 @@
 
 
 /* Memory utilities */
+#include <ctype.h>
+
 #define HYDU_MALLOC(p, type, size, status)                              \
     {                                                                   \
         (p) = (type) MPIU_Malloc((size));                               \
@@ -135,6 +139,7 @@
 char *HYDU_int_to_str(int x);
 char *HYDU_strerror(int error);
 int HYDU_strlist_lastidx(char **strlist);
+char **HYDU_str_to_strlist(char *str);
 
 
 /* Timer utilities */

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.c	2009-03-23 06:36:59 UTC (rev 4170)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.c	2009-03-23 09:20:17 UTC (rev 4171)
@@ -73,7 +73,6 @@
 
     /* Spawn the processes */
     process_id = 0;
-    core = -1;
     for (exec = HYD_PMCD_pmi_proxy_params.exec_list; exec; exec = exec->next) {
         for (i = 0; i < exec->proc_count; i++) {
 
@@ -106,7 +105,7 @@
                 client_args[arg++] = MPIU_Strdup(exec->exec[j]);
             client_args[arg++] = NULL;
 
-            core = HYDU_next_core(core, HYD_PMCD_pmi_proxy_params.binding);
+            core = HYDU_bind_get_core_id(process_id, HYD_PMCD_pmi_proxy_params.binding);
             if (pmi_id == 0) {
                 status = HYDU_create_process(client_args, exec->prop_env,
                                              &HYD_PMCD_pmi_proxy_params.in,

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c	2009-03-23 06:36:59 UTC (rev 4170)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c	2009-03-23 09:20:17 UTC (rev 4171)
@@ -130,6 +130,8 @@
         partition->proxy_args[arg++] = HYDU_int_to_str(handle.binding);
         if (handle.user_bind_map)
             partition->proxy_args[arg++] = MPIU_Strdup(handle.user_bind_map);
+        else if (partition->user_bind_map)
+            partition->proxy_args[arg++] = MPIU_Strdup(partition->user_bind_map);
         else
             partition->proxy_args[arg++] = MPIU_Strdup("HYDRA_NO_USER_MAP");
 

Modified: mpich2/trunk/src/pm/hydra/utils/bind/bind.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/bind/bind.c	2009-03-23 06:36:59 UTC (rev 4170)
+++ mpich2/trunk/src/pm/hydra/utils/bind/bind.c	2009-03-23 09:20:17 UTC (rev 4171)
@@ -16,8 +16,7 @@
 
     int user_bind_valid;
     int *user_bind_map;
-} bind_info = {
-0, -1, -1, -1, NULL, 0, NULL};
+} bind_info;
 
 HYD_Status HYDU_bind_init(char *user_bind_map)
 {
@@ -139,71 +138,39 @@
 }
 
 
-int HYDU_next_core(int old_core, HYD_Binding binding)
+int HYDU_bind_get_core_id(int id, HYD_Binding binding)
 {
-    int socket = 0, core = 0;
+    int socket = 0, core = 0, curid, realid;
     int i, j;
 
     HYDU_FUNC_ENTER();
 
     if (bind_info.supported) {
+        realid = (id % (bind_info.num_cores * bind_info.num_sockets));
+
         if (binding == HYD_BIND_RR) {
-            /* Round-robin is easy; just give the next core */
-            return ((old_core + 1) % (bind_info.num_sockets * bind_info.num_cores));
+            return (id % (bind_info.num_sockets * bind_info.num_cores));
         }
         else if (binding == HYD_BIND_BUDDY) {
-            if (old_core < -1)
-                return 0;
-
-            /* First find the old core */
+            /* If we reached the maximum, loop around */
+            curid = 0;
             for (core = 0; core < bind_info.num_cores; core++) {
                 for (socket = 0; socket < bind_info.num_sockets; socket++) {
-                    if (bind_info.bind_map[socket][core] == old_core)
-                        break;
+                    if (curid == realid)
+                        return bind_info.bind_map[socket][core];
+                    curid++;
                 }
-                if (bind_info.bind_map[socket][core] == old_core)
-                    break;
             }
-
-            /* If there is another socket available after this, give
-             * the same core ID on that socket */
-            if (socket < bind_info.num_sockets - 1)
-                return bind_info.bind_map[socket + 1][core];
-            /* If we are the last socket, and there is a core left
-             * after ours, give that core on the first socket */
-            else if (core < bind_info.num_cores - 1)
-                return bind_info.bind_map[0][core + 1];
-            /* If we are the last socket and last core, loop back to
-             * the start */
-            else
-                return bind_info.bind_map[0][0];
         }
         else if (binding == HYD_BIND_PACK) {
-            if (old_core < -1)
-                return 0;
-
-            /* First find the old core */
-            for (core = 0; core < bind_info.num_cores; core++) {
-                for (socket = 0; socket < bind_info.num_sockets; socket++) {
-                    if (bind_info.bind_map[socket][core] == old_core)
-                        break;
+            curid = 0;
+            for (socket = 0; socket < bind_info.num_sockets; socket++) {
+                for (core = 0; core < bind_info.num_cores; core++) {
+                    if (curid == realid)
+                        return bind_info.bind_map[socket][core];
+                    curid++;
                 }
-                if (bind_info.bind_map[socket][core] == old_core)
-                    break;
             }
-
-            /* If there is another core available after this, give
-             * that core ID on the same socket */
-            if (core < bind_info.num_cores - 1)
-                return bind_info.bind_map[socket][core + 1];
-            /* If we are the last core, and there is a socket left
-             * after ours, give the first core on that socket */
-            else if (socket < bind_info.num_sockets - 1)
-                return bind_info.bind_map[socket + 1][0];
-            /* If we are the last socket and last core, loop back to
-             * the start */
-            else
-                return bind_info.bind_map[0][0];
         }
         else if (binding == HYD_BIND_NONE) {
             return -1;
@@ -211,18 +178,8 @@
         else if (binding == HYD_BIND_USER) {
             if (!bind_info.user_bind_valid)
                 return -1;
-
-            if (old_core < 0)
-                return bind_info.user_bind_map[0];
-
-            for (i = 0; i < ((bind_info.num_cores * bind_info.num_sockets) - 1); i++) {
-                if (bind_info.user_bind_map[i] == old_core) {
-                    j = ((i + 1) % (bind_info.num_cores * bind_info.num_sockets));
-                    return bind_info.user_bind_map[j];
-                }
-                else if (bind_info.user_bind_map[i] == -1)
-                    return -1;
-            }
+            else
+                return bind_info.user_bind_map[realid];
         }
     }
     else {

Modified: mpich2/trunk/src/pm/hydra/utils/launch/allocate.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/launch/allocate.c	2009-03-23 06:36:59 UTC (rev 4170)
+++ mpich2/trunk/src/pm/hydra/utils/launch/allocate.c	2009-03-23 09:20:17 UTC (rev 4171)
@@ -14,6 +14,7 @@
 
     HYDU_MALLOC(*partition, struct HYD_Partition *, sizeof(struct HYD_Partition), status);
     (*partition)->name = NULL;
+    (*partition)->user_bind_map = NULL;
     (*partition)->segment_list = NULL;
     (*partition)->total_proc_count = 0;
 
@@ -96,6 +97,8 @@
         tpartition = partition->next;
 
         HYDU_FREE(partition->name);
+        if (partition->user_bind_map)
+            HYDU_FREE(partition->user_bind_map);
 
         segment = partition->segment_list;
         while (segment) {
@@ -198,12 +201,129 @@
 }
 
 
-HYD_Status HYDU_alloc_partition_exec(struct HYD_Partition_exec ** exec)
+static int count_elements(char *str, char *delim)
 {
+    int count;
+
+    HYDU_FUNC_ENTER();
+
+    strtok(str, delim);
+    count = 1;
+    while (strtok(NULL, delim))
+        count++;
+
+    HYDU_FUNC_EXIT();
+
+    return count;
+}
+
+
+static char *pad_string(char *str, char *pad, int count)
+{
+    char *tmp[HYD_NUM_TMP_STRINGS], *out;
+    int i, j;
     HYD_Status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
 
+    i = 0;
+    tmp[i++] = MPIU_Strdup(str);
+    for (j = 0; j < count; j++)
+        tmp[i++] = MPIU_Strdup(pad);
+    tmp[i] = NULL;
+
+    status = HYDU_str_alloc_and_join(tmp, &out);
+    HYDU_ERR_POP(status, "unable to join strings\n");
+
+    HYDU_free_strlist(tmp);
+
+  fn_exit:
+    HYDU_FUNC_EXIT();
+    return out;
+
+  fn_fail:
+    goto fn_exit;
+}
+
+
+HYD_Status HYDU_merge_partition_mapping(char *name, char *map, int num_procs,
+                                        struct HYD_Partition **partition_list)
+{
+    struct HYD_Partition *partition;
+    char *tmp[HYD_NUM_TMP_STRINGS], *x;
+    int i, count;
+    HYD_Status status = HYD_SUCCESS;
+
+    HYDU_FUNC_ENTER();
+
+    if (*partition_list == NULL) {
+        HYDU_alloc_partition(partition_list);
+        (*partition_list)->name = MPIU_Strdup(name);
+
+        x = MPIU_Strdup(map);
+        count = num_procs - count_elements(x, ",");
+        HYDU_FREE(x);
+
+        (*partition_list)->user_bind_map = pad_string(map, ",-1", count);
+    }
+    else {
+        partition = *partition_list;
+        while (partition) {
+            if (strcmp(partition->name, name) == 0) {
+                /* Found a partition with the same name; append */
+                if (partition->user_bind_map == NULL) {
+                    x = MPIU_Strdup(map);
+                    count = num_procs - count_elements(x, ",");
+                    HYDU_FREE(x);
+
+                    partition->user_bind_map = pad_string(map, ",-1", count);
+                }
+                else {
+                    x = MPIU_Strdup(map);
+                    count = num_procs - count_elements(x, ",");
+                    HYDU_FREE(x);
+
+                    i = 0;
+                    tmp[i++] = MPIU_Strdup(partition->user_bind_map);
+                    tmp[i++] = MPIU_Strdup(",");
+                    tmp[i++] = pad_string(map, ",-1", count);
+                    tmp[i++] = NULL;
+
+                    HYDU_FREE(partition->user_bind_map);
+                    status = HYDU_str_alloc_and_join(tmp, &partition->user_bind_map);
+                    HYDU_ERR_POP(status, "unable to join strings\n");
+
+                    HYDU_free_strlist(tmp);
+                }
+                break;
+            }
+            else if (partition->next == NULL) {
+                HYDU_alloc_partition(&partition->next);
+                partition->next->name = MPIU_Strdup(name);
+                partition->next->user_bind_map = MPIU_Strdup(map);
+                break;
+            }
+            else {
+                partition = partition->next;
+            }
+        }
+    }
+
+  fn_exit:
+    HYDU_FUNC_EXIT();
+    return status;
+
+  fn_fail:
+    goto fn_exit;
+}
+
+
+HYD_Status HYDU_alloc_partition_exec(struct HYD_Partition_exec **exec)
+{
+    HYD_Status status = HYD_SUCCESS;
+
+    HYDU_FUNC_ENTER();
+
     HYDU_MALLOC(*exec, struct HYD_Partition_exec *, sizeof(struct HYD_Partition_exec), status);
     (*exec)->exec[0] = NULL;
     (*exec)->proc_count = 0;
@@ -223,8 +343,9 @@
 HYD_Status HYDU_create_host_list(char *host_file, struct HYD_Partition **partition_list)
 {
     FILE *fp = NULL;
-    char line[2 * MAX_HOSTNAME_LEN], *hostname, *procs;
-    int num_procs, total_count;
+    char line[HYD_TMP_STRLEN], *hostname, *procs, **arg_list;
+    char *str[2] = { NULL };
+    int num_procs, total_count, arg, i;
     struct HYD_Partition_segment *segment;
     HYD_Status status = HYD_SUCCESS;
 
@@ -246,15 +367,10 @@
                                  "unable to open host file: %s\n", host_file);
 
         total_count = 0;
-        while (!feof(fp)) {
-            line[0] = 0;
-            if ((fscanf(fp, "%s", line) < 0) && errno)
-                HYDU_ERR_SETANDJUMP1(status, HYD_INTERNAL_ERROR,
-                                     "unable to read input line (errno: %d)\n", errno);
-            if (line[0] == 0)
-                break;
+        while (fgets(line, HYD_TMP_STRLEN, fp)) {
+            arg_list = HYDU_str_to_strlist(line);
 
-            hostname = strtok(line, ":");
+            hostname = strtok(arg_list[0], ":");
             procs = strtok(NULL, ":");
             num_procs = procs ? atoi(procs) : 1;
 
@@ -264,15 +380,34 @@
             HYDU_alloc_partition_segment(&segment);
             segment->start_pid = total_count;
             segment->proc_count = num_procs;
-            HYDU_merge_partition_segment(hostname, segment, partition_list);
+            status = HYDU_merge_partition_segment(hostname, segment, partition_list);
+            HYDU_ERR_POP(status, "merge partition segment failed\n");
 
             total_count += num_procs;
+
+            /* Check for the remaining parameters */
+            arg = 1;
+            while (arg_list[arg]) {
+                status = HYDU_strsplit(arg_list[arg], &str[0], &str[1], '=');
+                HYDU_ERR_POP(status, "unable to split string\n");
+
+                if (!strcmp(str[0], "map")) {
+                    status = HYDU_merge_partition_mapping(hostname, str[1], num_procs,
+                                                          partition_list);
+                    HYDU_ERR_POP(status, "merge partition mapping failed\n");
+                }
+
+                arg++;
+            }
         }
 
         fclose(fp);
     }
 
   fn_exit:
+    for (i = 0; i < 2; i++)
+        if (str[i])
+            HYDU_FREE(str[i]);
     HYDU_FUNC_EXIT();
     return status;
 

Modified: mpich2/trunk/src/pm/hydra/utils/string/string.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/string/string.c	2009-03-23 06:36:59 UTC (rev 4170)
+++ mpich2/trunk/src/pm/hydra/utils/string/string.c	2009-03-23 09:20:17 UTC (rev 4171)
@@ -59,8 +59,9 @@
 
     HYDU_FUNC_ENTER();
 
-    for (i = 0; strlist[i] != NULL; i++)
+    for (i = 0; strlist[i] != NULL; i++) {
         len += strlen(strlist[i]);
+    }
 
     HYDU_MALLOC(*strjoin, char *, len + 1, status);
     count = 0;
@@ -168,3 +169,43 @@
 
     return i;
 }
+
+
+char **HYDU_str_to_strlist(char *str)
+{
+    int argc = 0;
+    char **strlist;
+    char *p, *r;
+    HYD_Status status = HYD_SUCCESS;
+
+    HYDU_FUNC_ENTER();
+
+    HYDU_MALLOC(strlist, char **, HYD_NUM_TMP_STRINGS * sizeof(char *), status);
+
+    p = str;
+    while (*p) {
+        while (isspace(*p))
+            p++;
+        if (argc >= HYD_NUM_TMP_STRINGS)
+            HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "too many arguments in line\n");
+
+        /* Make a copy and NULL terminate it */
+        strlist[argc] = MPIU_Strdup(p);
+        r = strlist[argc];
+        while (*r && !isspace(*r))
+            r++;
+        *r = 0;
+
+        while (*p && !isspace(*p))
+            p++;
+        argc++;
+    }
+    strlist[argc] = NULL;
+
+  fn_exit:
+    HYDU_FUNC_EXIT();
+    return strlist;
+
+  fn_fail:
+    goto fn_exit;
+}



More information about the mpich2-commits mailing list