[mpich2-commits] r4158 - in mpich2/trunk/src/pm/hydra: . include launcher/mpiexec launcher/utils pm/pmiserv utils/bind utils/launch

balaji at mcs.anl.gov balaji at mcs.anl.gov
Sun Mar 22 04:31:54 CDT 2009


Author: balaji
Date: 2009-03-22 04:31:53 -0500 (Sun, 22 Mar 2009)
New Revision: 4158

Modified:
   mpich2/trunk/src/pm/hydra/README
   mpich2/trunk/src/pm/hydra/include/hydra.h
   mpich2/trunk/src/pm/hydra/include/hydra_base.h
   mpich2/trunk/src/pm/hydra/include/hydra_utils.h
   mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c
   mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c
   mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.h
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy_utils.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c
   mpich2/trunk/src/pm/hydra/utils/bind/bind.c
   mpich2/trunk/src/pm/hydra/utils/launch/launch.c
Log:
Added an initial version of process-core mapping and some
documentation describing it. Currently, only the round-robin
allocation works correctly. Topology-aware allocations are broken
(probably in PLPA itself). User-defined mappings are not supported
yet.


Modified: mpich2/trunk/src/pm/hydra/README
===================================================================
--- mpich2/trunk/src/pm/hydra/README	2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/README	2009-03-22 09:31:53 UTC (rev 4158)
@@ -10,9 +10,10 @@
 available here:
 http://wiki.mcs.anl.gov/mpich2/index.php/Hydra_Process_Management_Framework
 
-How to use Hydra
-----------------
 
+Quick Start
+-----------
+
 To use hydra, mpich2 needs to be configured with the configure option
 --with-pm=hydra.
 
@@ -34,16 +35,27 @@
 
  $ cat hosts
 
-   donner.mcs.anl.gov
-   foo.mcs.anl.gov
-   shakey.mcs.anl.gov
-   terra.mcs.anl.gov
+   donner
+   foo
+   shakey
+   terra
 
 To run your application on these nodes, use mpiexec:
 
  $ mpiexec -f hosts -n 4 ./app
 
+The host file can also be specified as follows:
 
+ $ cat hosts
+
+   donner:2
+   foo:3
+   shakey:2
+
+In this case, the first 2 processes are scheduled on "donner", the
+next 3 on "foo" and the last 2 on "shakey".
+
+
 Environment settings
 --------------------
 
@@ -75,6 +87,69 @@
 The default bootstrap server is ssh.
 
 
+Process-core binding
+--------------------
+
+We support multiple modes of process-core binding: round-robin ("rr"),
+buddy-allocation ("buddy"), closest packing ("pack") and user-defined
+("user"). These can be selected as follows:
+
+ $ mpiexec --binding rr -f hosts -n 8 ./app
+
+ ... or ...
+
+ $ mpiexec --binding pack -f hosts -n 8 ./app
+
+Consider the following layout of processing elements in the system
+(e.g., two nodes, each with two processors, and each processor with
+two cores). Suppose the Operating System assigned processor IDs for
+each of these processing elements are as shown below:
+
+__________________________________________      __________________________________________
+|  _________________    _________________  |    |  _________________    _________________  | 
+| |  _____   _____  |  |  _____   _____  | |    | |  _____   _____  |  |  _____   _____  | |
+| | |     | |     | |  | |     | |     | | |    | | |     | |     | |  | |     | |     | | |
+| | |     | |     | |  | |     | |     | | |    | | |     | |     | |  | |     | |     | | | 
+| | |  0  | |  2  | |  | |  1  | |  3  | | |    | | |  0  | |  1  | |  | |  2  | |  3  | | |
+| | |     | |     | |  | |     | |     | | |    | | |     | |     | |  | |     | |     | | |
+| | |_____| |_____| |  | |_____| |_____| | |    | | |_____| |_____| |  | |_____| |_____| | |
+| |_________________|  |_________________| |    | |_________________|  |_________________| |
+|__________________________________________|    |__________________________________________|
+
+
+In this case, the binding options are as follows:
+
+RR: 0, 1, 2, 3 (use the order provided by the OS)
+Buddy: 0, 1, 2, 3 (increasing sharing of resources)
+Pack: 0, 2, 1, 3 (closest packing)
+User: as defined by the user
+
+Within the user-defined binding, two modes are supported: command-line
+and host-file based. The command-line based mode can be used as
+follows:
+
+ $ mpiexec --binding user:0,3 -f hosts -n 4 ./app
+
+In this case, the first two processes are bound to the processing
+elements specified, while the last two are not bound at all. The
+mapping is the same for all machines, so if the application is run
+with 8 processes, the first 2 processes on "each machine" are bound to
+processing elements as specified.
+
+The host-file based mode for user-defined binding can be used by the
+"map=" argument on each host line. E.g.:
+
+ $ cat hosts
+
+   donner:4    map=1,2,3,4
+   foo:4       map=4,3,2,1
+   shakey:2
+
+Using this method, each host can be given a different mapping. Note
+that the last line does not have any mapping: in this case, the
+processes on that machine are not bound to any core.
+
+
 X Forwarding
 ------------
 

Modified: mpich2/trunk/src/pm/hydra/include/hydra.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra.h	2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/include/hydra.h	2009-03-22 09:31:53 UTC (rev 4158)
@@ -15,6 +15,7 @@
     char *base_path;
     int proxy_port;
     char *bootstrap;
+    HYD_Binding binding;
 
     int debug;
     int enablex;

Modified: mpich2/trunk/src/pm/hydra/include/hydra_base.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra_base.h	2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/include/hydra_base.h	2009-03-22 09:31:53 UTC (rev 4158)
@@ -92,6 +92,14 @@
     HYD_ENV_PROP_LIST
 } HYD_Env_prop_t;
 
+typedef enum {
+    HYD_BIND_UNSET,
+    HYD_BIND_RR,
+    HYD_BIND_BUDDY,
+    HYD_BIND_PACK,
+    HYD_BIND_USER
+} HYD_Binding;
+
 /* List of contiguous segments of processes on a partition */
 struct HYD_Partition_segment {
     int start_pid;

Modified: mpich2/trunk/src/pm/hydra/include/hydra_utils.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra_utils.h	2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/include/hydra_utils.h	2009-03-22 09:31:53 UTC (rev 4158)
@@ -17,9 +17,13 @@
 #if defined PROC_BINDING
 #include "plpa.h"
 #include "plpa_internal.h"
-HYD_Status HYDU_bind_process(int core);
+HYD_Status HYDU_bind_init(void);
+void HYDU_bind_process(int core);
+int HYDU_next_core(int core, HYD_Binding binding);
 #else
+#define HYDU_bind_init(...) HYD_SUCCESS
 #define HYDU_bind_process(...) HYD_SUCCESS
+#define HYDU_next_core(...) (-1)
 #endif /* PROC_BINDING */
 
 

Modified: mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c	2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c	2009-03-22 09:31:53 UTC (rev 4158)
@@ -23,6 +23,8 @@
     printf("\t--enable-x/--disable-x           [Enable or disable X forwarding]\n");
     printf("\t--proxy-port                     [Port on which proxies can listen]\n");
     printf("\t--bootstrap                      [Bootstrap server to use]\n");
+    printf("\t--binding                        [Process binding]");
+
     printf("\t-genv {name} {value}             [Environment variable name and value]\n");
     printf("\t-genvlist {env1,env2,...}        [Environment variable list to pass]\n");
     printf("\t-genvnone                        [Do not pass any environment variables]\n");

Modified: mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c	2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c	2009-03-22 09:31:53 UTC (rev 4158)
@@ -197,6 +197,24 @@
             continue;
         }
 
+        if (!strcmp(str1, "--binding")) {
+            if (!str2) {
+                INCREMENT_ARGV(status);
+                str2 = *argv;
+            }
+            HYDU_ERR_CHKANDJUMP(status, handle.binding != HYD_BIND_UNSET,
+                                HYD_INTERNAL_ERROR, "duplicate binding\n");
+            if (!strcmp(str2, "rr"))
+                handle.binding = HYD_BIND_RR;
+            else if (!strcmp(str2, "buddy"))
+                handle.binding = HYD_BIND_BUDDY;
+            else if (!strcmp(str2, "pack"))
+                handle.binding = HYD_BIND_PACK;
+            /* We don't support user-specified mappings yet */
+
+            continue;
+        }
+
         if (!strcmp(str1, "--proxy-port")) {
             if (!str2) {
                 INCREMENT_ARGV(status);

Modified: mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c	2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c	2009-03-22 09:31:53 UTC (rev 4158)
@@ -15,6 +15,7 @@
     handle.base_path = NULL;
     handle.proxy_port = -1;
     handle.bootstrap = NULL;
+    handle.binding = HYD_BIND_UNSET;
 
     handle.debug = -1;
     handle.enablex = -1;

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.c	2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.c	2009-03-22 09:31:53 UTC (rev 4158)
@@ -15,7 +15,7 @@
 int main(int argc, char **argv)
 {
     int i, j, arg, count, pid, ret_status;
-    int stdin_fd, timeout, process_id;
+    int stdin_fd, timeout, process_id, core;
     char *str, *timeout_str;
     char *client_args[HYD_EXEC_ARGS];
     char *tmp[HYDU_NUM_JOIN_STR];
@@ -64,8 +64,12 @@
     status = HYDU_putenv_list(HYD_PMCD_pmi_proxy_params.global_env);
     HYDU_ERR_POP(status, "putenv returned error\n");
 
+    status = HYDU_bind_init();
+    HYDU_ERR_POP(status, "unable to initialize process binding\n");
+
     /* Spawn the processes */
     process_id = 0;
+    core = -1;
     for (exec = HYD_PMCD_pmi_proxy_params.exec_list; exec; exec = exec->next) {
         for (i = 0; i < exec->proc_count; i++) {
 
@@ -84,13 +88,14 @@
                 client_args[arg++] = MPIU_Strdup(exec->exec[j]);
             client_args[arg++] = NULL;
 
+            core = HYDU_next_core(core, HYD_PMCD_pmi_proxy_params.binding);
             if ((process_id + HYD_PMCD_pmi_proxy_params.pmi_id) == 0) {
                 status = HYDU_create_process(client_args, exec->prop_env,
                                              &HYD_PMCD_pmi_proxy_params.in,
                                              &HYD_PMCD_pmi_proxy_params.out[process_id],
                                              &HYD_PMCD_pmi_proxy_params.err[process_id],
                                              &HYD_PMCD_pmi_proxy_params.pid[process_id],
-                                             process_id);
+                                             core);
             }
             else {
                 status = HYDU_create_process(client_args, exec->prop_env,
@@ -98,7 +103,7 @@
                                              &HYD_PMCD_pmi_proxy_params.out[process_id],
                                              &HYD_PMCD_pmi_proxy_params.err[process_id],
                                              &HYD_PMCD_pmi_proxy_params.pid[process_id],
-                                             process_id);
+                                             core);
             }
             HYDU_ERR_POP(status, "spawn process returned error\n");
 

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.h
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.h	2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.h	2009-03-22 09:31:53 UTC (rev 4158)
@@ -14,6 +14,7 @@
     int proxy_port;
     int pmi_id;
     char *wdir;
+    HYD_Binding binding;
     HYD_Env_t *global_env;
 
     int one_pass_count;

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy_utils.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy_utils.c	2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy_utils.c	2009-03-22 09:31:53 UTC (rev 4158)
@@ -4,7 +4,6 @@
  *      See COPYRIGHT in top-level directory.
  */
 
-#include "hydra.h"
 #include "pmi_proxy.h"
 
 struct HYD_PMCD_pmi_proxy_params HYD_PMCD_pmi_proxy_params;
@@ -39,6 +38,13 @@
             continue;
         }
 
+        /* Working directory */
+        if (!strcmp(*argv, "--binding")) {
+            argv++;
+            HYD_PMCD_pmi_proxy_params.binding = atoi(*argv);
+            continue;
+        }
+
         /* Global env */
         if (!strcmp(*argv, "--global-env")) {
             argv++;

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c	2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c	2009-03-22 09:31:53 UTC (rev 4158)
@@ -122,6 +122,9 @@
         partition->proxy_args[arg++] = MPIU_Strdup("--wdir");
         partition->proxy_args[arg++] = MPIU_Strdup(handle.wdir);
 
+        partition->proxy_args[arg++] = MPIU_Strdup("--binding");
+        partition->proxy_args[arg++] = HYDU_int_to_str(handle.binding);
+
         /* Pass the global environment separately, instead of for each
          * executable, as an optimization */
         partition->proxy_args[arg++] = MPIU_Strdup("--global-env");

Modified: mpich2/trunk/src/pm/hydra/utils/bind/bind.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/bind/bind.c	2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/utils/bind/bind.c	2009-03-22 09:31:53 UTC (rev 4158)
@@ -6,12 +6,22 @@
 
 #include "hydra_utils.h"
 
-HYD_Status HYDU_bind_process(int core)
+struct HYDU_bind_info {
+    int supported;
+    int num_procs;
+    int num_sockets;
+    int num_cores;
+};
+
+static struct HYDU_bind_info HYDU_bind_info = { 0, -1, -1, -1 };
+
+HYD_Status HYDU_bind_init(void)
 {
     plpa_api_type_t p;
-    plpa_cpu_set_t cpuset;
     int ret, supported;
     int num_procs, max_proc_id;
+    int num_sockets = -1, max_socket_id;
+    int num_cores = -1, max_core_id;
     HYD_Status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
@@ -31,6 +41,19 @@
             HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                                 "plpa get processor data failed\n");
         }
+
+        /* PLPA only gives information about sockets and cores */
+        ret = plpa_get_socket_info(&num_sockets, &max_socket_id);
+        if (ret) {
+            HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
+                                "plpa get processor data failed\n");
+        }
+
+        ret = plpa_get_core_info(0, &num_cores, &max_core_id);
+        if (ret) {
+            HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
+                                "plpa get processor data failed\n");
+        }
     }
     else {
         /* If this failed, we just return without binding */
@@ -38,11 +61,10 @@
         goto fn_exit;
     }
 
-    PLPA_CPU_ZERO(&cpuset);
-    PLPA_CPU_SET(core % num_procs, &cpuset);
-    ret = plpa_sched_setaffinity(0, 1, &cpuset);
-    if (ret)
-        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "plpa setaffinity failed\n");
+    HYDU_bind_info.supported = 1;
+    HYDU_bind_info.num_procs = num_procs;
+    HYDU_bind_info.num_sockets = num_sockets;
+    HYDU_bind_info.num_cores = num_cores;
 
   fn_exit:
     HYDU_FUNC_EXIT();
@@ -51,3 +73,94 @@
   fn_fail:
     goto fn_exit;
 }
+
+void HYDU_bind_process(int core)
+{
+    int ret;
+    plpa_cpu_set_t cpuset;
+    HYD_Status status = HYD_SUCCESS;
+
+    HYDU_FUNC_ENTER();
+
+    if (HYDU_bind_info.supported) {
+        PLPA_CPU_ZERO(&cpuset);
+        PLPA_CPU_SET(core % HYDU_bind_info.num_procs, &cpuset);
+        ret = plpa_sched_setaffinity(0, 1, &cpuset);
+        if (ret)
+            HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "plpa setaffinity failed\n");
+    }
+
+  fn_exit:
+    HYDU_FUNC_EXIT();
+    return;
+
+  fn_fail:
+    goto fn_exit;
+}
+
+
+int HYDU_next_core(int old_core, HYD_Binding binding)
+{
+    int socket, core, proc;
+    int ret, new_core = -1, found;
+    HYD_Status status = HYD_SUCCESS;
+
+    HYDU_FUNC_ENTER();
+
+    /* Round-robin is easy; just give the next core */
+    if (HYDU_bind_info.supported) {
+        if (binding == HYD_BIND_RR) {
+            return (old_core + 1);
+        }
+        else if (binding == HYD_BIND_BUDDY) {
+            found = 0;
+            for (core = 0; core < HYDU_bind_info.num_cores; core++)
+                for (socket = 0; socket < HYDU_bind_info.num_sockets; socket++) {
+                    ret = plpa_map_to_processor_id(socket, core, &proc);
+                    if (ret)
+                        HYDU_ERR_SETANDJUMP2(status, HYD_INTERNAL_ERROR,
+                                             "plpa map_to_proc_id failed (%d,%d)\n",
+                                             socket, core);
+
+                    if (found)
+                        return proc;
+                    else if (proc != core)
+                        continue;
+                    else
+                        found = 1;
+                }
+
+            return -1;
+        }
+        else if (binding == HYD_BIND_PACK) {
+            found = 0;
+            for (socket = 0; socket < HYDU_bind_info.num_sockets; socket++) {
+                for (core = 0; core < HYDU_bind_info.num_cores; core++)
+                    ret = plpa_map_to_processor_id(socket, core, &proc);
+                    if (ret)
+                        HYDU_ERR_SETANDJUMP2(status, HYD_INTERNAL_ERROR,
+                                             "plpa map_to_proc_id failed (%d,%d)\n",
+                                             socket, core);
+
+                    if (found)
+                        return proc;
+                    else if (proc != core)
+                        continue;
+                    else
+                        found = 1;
+                }
+
+            return -1;
+        }
+        else
+            return -1;
+    }
+
+  fn_exit:
+    HYDU_FUNC_EXIT();
+    return new_core;
+
+  fn_fail:
+    new_core = -1;
+    goto fn_exit;
+}

Modified: mpich2/trunk/src/pm/hydra/utils/launch/launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/launch/launch.c	2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/utils/launch/launch.c	2009-03-22 09:31:53 UTC (rev 4158)
@@ -41,20 +41,21 @@
             HYDU_ERR_SETANDJUMP1(status, HYD_SOCK_ERROR, "dup2 error (%s)\n",
                                  HYDU_strerror(errno));
 
-        if (core >= 0) {
-            status = HYDU_bind_process(core);
-            HYDU_ERR_POP(status, "bind process failed\n");
-        }
-
         close(inpipe[1]);
         close(0);
-        if (in && (dup2(inpipe[0], 0) < 0))
+        if (in && (dup2(inpipe[0], 0) < 0)) {
             HYDU_ERR_SETANDJUMP1(status, HYD_SOCK_ERROR, "dup2 error (%s)\n",
                                  HYDU_strerror(errno));
+        }
 
         status = HYDU_putenv_list(env_list);
         HYDU_ERR_POP(status, "unable to putenv\n");
 
+        if (core >= 0) {
+            HYDU_bind_process(core);
+            HYDU_ERR_POP(status, "bind process failed\n");
+        }
+
         if (execvp(client_arg[0], client_arg) < 0) {
             HYDU_ERR_SETANDJUMP1(status, HYD_INTERNAL_ERROR, "execvp error (%s)\n",
                                  HYDU_strerror(errno));



More information about the mpich2-commits mailing list