[mpich2-commits] r4158 - in mpich2/trunk/src/pm/hydra: . include launcher/mpiexec launcher/utils pm/pmiserv utils/bind utils/launch
balaji at mcs.anl.gov
balaji at mcs.anl.gov
Sun Mar 22 04:31:54 CDT 2009
Author: balaji
Date: 2009-03-22 04:31:53 -0500 (Sun, 22 Mar 2009)
New Revision: 4158
Modified:
mpich2/trunk/src/pm/hydra/README
mpich2/trunk/src/pm/hydra/include/hydra.h
mpich2/trunk/src/pm/hydra/include/hydra_base.h
mpich2/trunk/src/pm/hydra/include/hydra_utils.h
mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c
mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c
mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.c
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.h
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy_utils.c
mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c
mpich2/trunk/src/pm/hydra/utils/bind/bind.c
mpich2/trunk/src/pm/hydra/utils/launch/launch.c
Log:
Added an initial version of process-core mapping and some
documentation describing it. Currently, only the round-robin
allocation works correctly. Topology-aware allocations are broken
(probably in PLPA itself). User-defined mappings are not supported
yet.
Modified: mpich2/trunk/src/pm/hydra/README
===================================================================
--- mpich2/trunk/src/pm/hydra/README 2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/README 2009-03-22 09:31:53 UTC (rev 4158)
@@ -10,9 +10,10 @@
available here:
http://wiki.mcs.anl.gov/mpich2/index.php/Hydra_Process_Management_Framework
-How to use Hydra
-----------------
+Quick Start
+-----------
+
To use hydra, mpich2 needs to be configured with the configure option
--with-pm=hydra.
@@ -34,16 +35,27 @@
$ cat hosts
- donner.mcs.anl.gov
- foo.mcs.anl.gov
- shakey.mcs.anl.gov
- terra.mcs.anl.gov
+ donner
+ foo
+ shakey
+ terra
To run your application on these nodes, use mpiexec:
$ mpiexec -f hosts -n 4 ./app
+The host file can also be specified as follows:
+ $ cat hosts
+
+ donner:2
+ foo:3
+ shakey:2
+
+In this case, the first 2 processes are scheduled on "donner", the
+next 3 on "foo" and the last 2 on "shakey".
+
+
Environment settings
--------------------
@@ -75,6 +87,69 @@
The default bootstrap server is ssh.
+Process-core binding
+--------------------
+
+We support multiple modes of process-core binding: round-robin ("rr"),
+buddy-allocation ("buddy"), closest packing ("pack") and user-defined
+("user"). These can be selected as follows:
+
+ $ mpiexec --binding rr -f hosts -n 8 ./app
+
+ ... or ...
+
+ $ mpiexec --binding pack -f hosts -n 8 ./app
+
+Consider the following layout of processing elements in the system
+(e.g., two nodes, each with two processors, and each processor with
+two cores). Suppose the Operating System assigned processor IDs for
+each of these processing elements are as shown below:
+
+__________________________________________ __________________________________________
+| _________________ _________________ | | _________________ _________________ |
+| | _____ _____ | | _____ _____ | | | | _____ _____ | | _____ _____ | |
+| | | | | | | | | | | | | | | | | | | | | | | | | | | |
+| | | | | | | | | | | | | | | | | | | | | | | | | | | |
+| | | 0 | | 2 | | | | 1 | | 3 | | | | | | 0 | | 1 | | | | 2 | | 3 | | |
+| | | | | | | | | | | | | | | | | | | | | | | | | | | |
+| | |_____| |_____| | | |_____| |_____| | | | | |_____| |_____| | | |_____| |_____| | |
+| |_________________| |_________________| | | |_________________| |_________________| |
+|__________________________________________| |__________________________________________|
+
+
+In this case, the binding options are as follows:
+
+RR: 0, 1, 2, 3 (use the order provided by the OS)
+Buddy: 0, 1, 2, 3 (increasing sharing of resources)
+Pack: 0, 2, 1, 3 (closest packing)
+User: as defined by the user
+
+Within the user-defined binding, two modes are supported: command-line
+and host-file based. The command-line based mode can be used as
+follows:
+
+ $ mpiexec --binding user:0,3 -f hosts -n 4 ./app
+
+In this case, the first two processes are bound to the processing
+elements specified, while the last two are not bound at all. The
+mapping is the same for all machines, so if the application is run
+with 8 processes, the first 2 processes on "each machine" are bound to
+processing elements as specified.
+
+The host-file based mode for user-defined binding can be used by the
+"map=" argument on each host line. E.g.:
+
+ $ cat hosts
+
+ donner:4 map=1,2,3,4
+ foo:4 map=4,3,2,1
+ shakey:2
+
+Using this method, each host can be given a different mapping. Note
+that the last line does not have any mapping: in this case, the
+processes on that machine are not bound to any core.
+
+
X Forwarding
------------
Modified: mpich2/trunk/src/pm/hydra/include/hydra.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra.h 2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/include/hydra.h 2009-03-22 09:31:53 UTC (rev 4158)
@@ -15,6 +15,7 @@
char *base_path;
int proxy_port;
char *bootstrap;
+ HYD_Binding binding;
int debug;
int enablex;
Modified: mpich2/trunk/src/pm/hydra/include/hydra_base.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra_base.h 2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/include/hydra_base.h 2009-03-22 09:31:53 UTC (rev 4158)
@@ -92,6 +92,14 @@
HYD_ENV_PROP_LIST
} HYD_Env_prop_t;
+typedef enum {
+ HYD_BIND_UNSET,
+ HYD_BIND_RR,
+ HYD_BIND_BUDDY,
+ HYD_BIND_PACK,
+ HYD_BIND_USER
+} HYD_Binding;
+
/* List of contiguous segments of processes on a partition */
struct HYD_Partition_segment {
int start_pid;
Modified: mpich2/trunk/src/pm/hydra/include/hydra_utils.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra_utils.h 2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/include/hydra_utils.h 2009-03-22 09:31:53 UTC (rev 4158)
@@ -17,9 +17,13 @@
#if defined PROC_BINDING
#include "plpa.h"
#include "plpa_internal.h"
-HYD_Status HYDU_bind_process(int core);
+HYD_Status HYDU_bind_init(void);
+void HYDU_bind_process(int core);
+int HYDU_next_core(int core, HYD_Binding binding);
#else
+#define HYDU_bind_init(...) HYD_SUCCESS
#define HYDU_bind_process(...) HYD_SUCCESS
+#define HYDU_next_core(...) (-1)
#endif /* PROC_BINDING */
Modified: mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c 2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c 2009-03-22 09:31:53 UTC (rev 4158)
@@ -23,6 +23,8 @@
printf("\t--enable-x/--disable-x [Enable or disable X forwarding]\n");
printf("\t--proxy-port [Port on which proxies can listen]\n");
printf("\t--bootstrap [Bootstrap server to use]\n");
+ printf("\t--binding [Process binding]");
+
printf("\t-genv {name} {value} [Environment variable name and value]\n");
printf("\t-genvlist {env1,env2,...} [Environment variable list to pass]\n");
printf("\t-genvnone [Do not pass any environment variables]\n");
Modified: mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c 2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c 2009-03-22 09:31:53 UTC (rev 4158)
@@ -197,6 +197,24 @@
continue;
}
+ if (!strcmp(str1, "--binding")) {
+ if (!str2) {
+ INCREMENT_ARGV(status);
+ str2 = *argv;
+ }
+ HYDU_ERR_CHKANDJUMP(status, handle.binding != HYD_BIND_UNSET,
+ HYD_INTERNAL_ERROR, "duplicate binding\n");
+ if (!strcmp(str2, "rr"))
+ handle.binding = HYD_BIND_RR;
+ else if (!strcmp(str2, "buddy"))
+ handle.binding = HYD_BIND_BUDDY;
+ else if (!strcmp(str2, "pack"))
+ handle.binding = HYD_BIND_PACK;
+ /* We don't support user-specified mappings yet */
+
+ continue;
+ }
+
if (!strcmp(str1, "--proxy-port")) {
if (!str2) {
INCREMENT_ARGV(status);
Modified: mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c 2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c 2009-03-22 09:31:53 UTC (rev 4158)
@@ -15,6 +15,7 @@
handle.base_path = NULL;
handle.proxy_port = -1;
handle.bootstrap = NULL;
+ handle.binding = HYD_BIND_UNSET;
handle.debug = -1;
handle.enablex = -1;
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.c 2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.c 2009-03-22 09:31:53 UTC (rev 4158)
@@ -15,7 +15,7 @@
int main(int argc, char **argv)
{
int i, j, arg, count, pid, ret_status;
- int stdin_fd, timeout, process_id;
+ int stdin_fd, timeout, process_id, core;
char *str, *timeout_str;
char *client_args[HYD_EXEC_ARGS];
char *tmp[HYDU_NUM_JOIN_STR];
@@ -64,8 +64,12 @@
status = HYDU_putenv_list(HYD_PMCD_pmi_proxy_params.global_env);
HYDU_ERR_POP(status, "putenv returned error\n");
+ status = HYDU_bind_init();
+ HYDU_ERR_POP(status, "unable to initialize process binding\n");
+
/* Spawn the processes */
process_id = 0;
+ core = -1;
for (exec = HYD_PMCD_pmi_proxy_params.exec_list; exec; exec = exec->next) {
for (i = 0; i < exec->proc_count; i++) {
@@ -84,13 +88,14 @@
client_args[arg++] = MPIU_Strdup(exec->exec[j]);
client_args[arg++] = NULL;
+ core = HYDU_next_core(core, HYD_PMCD_pmi_proxy_params.binding);
if ((process_id + HYD_PMCD_pmi_proxy_params.pmi_id) == 0) {
status = HYDU_create_process(client_args, exec->prop_env,
&HYD_PMCD_pmi_proxy_params.in,
&HYD_PMCD_pmi_proxy_params.out[process_id],
&HYD_PMCD_pmi_proxy_params.err[process_id],
&HYD_PMCD_pmi_proxy_params.pid[process_id],
- process_id);
+ core);
}
else {
status = HYDU_create_process(client_args, exec->prop_env,
@@ -98,7 +103,7 @@
&HYD_PMCD_pmi_proxy_params.out[process_id],
&HYD_PMCD_pmi_proxy_params.err[process_id],
&HYD_PMCD_pmi_proxy_params.pid[process_id],
- process_id);
+ core);
}
HYDU_ERR_POP(status, "spawn process returned error\n");
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.h
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.h 2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy.h 2009-03-22 09:31:53 UTC (rev 4158)
@@ -14,6 +14,7 @@
int proxy_port;
int pmi_id;
char *wdir;
+ HYD_Binding binding;
HYD_Env_t *global_env;
int one_pass_count;
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy_utils.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy_utils.c 2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_proxy_utils.c 2009-03-22 09:31:53 UTC (rev 4158)
@@ -4,7 +4,6 @@
* See COPYRIGHT in top-level directory.
*/
-#include "hydra.h"
#include "pmi_proxy.h"
struct HYD_PMCD_pmi_proxy_params HYD_PMCD_pmi_proxy_params;
@@ -39,6 +38,13 @@
continue;
}
+ /* Working directory */
+ if (!strcmp(*argv, "--binding")) {
+ argv++;
+ HYD_PMCD_pmi_proxy_params.binding = atoi(*argv);
+ continue;
+ }
+
/* Global env */
if (!strcmp(*argv, "--global-env")) {
argv++;
Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c 2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c 2009-03-22 09:31:53 UTC (rev 4158)
@@ -122,6 +122,9 @@
partition->proxy_args[arg++] = MPIU_Strdup("--wdir");
partition->proxy_args[arg++] = MPIU_Strdup(handle.wdir);
+ partition->proxy_args[arg++] = MPIU_Strdup("--binding");
+ partition->proxy_args[arg++] = HYDU_int_to_str(handle.binding);
+
/* Pass the global environment separately, instead of for each
* executable, as an optimization */
partition->proxy_args[arg++] = MPIU_Strdup("--global-env");
Modified: mpich2/trunk/src/pm/hydra/utils/bind/bind.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/bind/bind.c 2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/utils/bind/bind.c 2009-03-22 09:31:53 UTC (rev 4158)
@@ -6,12 +6,22 @@
#include "hydra_utils.h"
-HYD_Status HYDU_bind_process(int core)
+struct HYDU_bind_info {
+ int supported;
+ int num_procs;
+ int num_sockets;
+ int num_cores;
+};
+
+static struct HYDU_bind_info HYDU_bind_info = { 0, -1, -1, -1 };
+
+HYD_Status HYDU_bind_init(void)
{
plpa_api_type_t p;
- plpa_cpu_set_t cpuset;
int ret, supported;
int num_procs, max_proc_id;
+ int num_sockets = -1, max_socket_id;
+ int num_cores = -1, max_core_id;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
@@ -31,6 +41,19 @@
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
"plpa get processor data failed\n");
}
+
+ /* PLPA only gives information about sockets and cores */
+ ret = plpa_get_socket_info(&num_sockets, &max_socket_id);
+ if (ret) {
+ HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
+ "plpa get processor data failed\n");
+ }
+
+ ret = plpa_get_core_info(0, &num_cores, &max_core_id);
+ if (ret) {
+ HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
+ "plpa get processor data failed\n");
+ }
}
else {
/* If this failed, we just return without binding */
@@ -38,11 +61,10 @@
goto fn_exit;
}
- PLPA_CPU_ZERO(&cpuset);
- PLPA_CPU_SET(core % num_procs, &cpuset);
- ret = plpa_sched_setaffinity(0, 1, &cpuset);
- if (ret)
- HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "plpa setaffinity failed\n");
+ HYDU_bind_info.supported = 1;
+ HYDU_bind_info.num_procs = num_procs;
+ HYDU_bind_info.num_sockets = num_sockets;
+ HYDU_bind_info.num_cores = num_cores;
fn_exit:
HYDU_FUNC_EXIT();
@@ -51,3 +73,94 @@
fn_fail:
goto fn_exit;
}
+
+void HYDU_bind_process(int core)
+{
+ int ret;
+ plpa_cpu_set_t cpuset;
+ HYD_Status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ if (HYDU_bind_info.supported) {
+ PLPA_CPU_ZERO(&cpuset);
+ PLPA_CPU_SET(core % HYDU_bind_info.num_procs, &cpuset);
+ ret = plpa_sched_setaffinity(0, 1, &cpuset);
+ if (ret)
+ HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "plpa setaffinity failed\n");
+ }
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return;
+
+ fn_fail:
+ goto fn_exit;
+}
+
+
+int HYDU_next_core(int old_core, HYD_Binding binding)
+{
+ int socket, core, proc;
+ int ret, new_core = -1, found;
+ HYD_Status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ /* Round-robin is easy; just give the next core */
+ if (HYDU_bind_info.supported) {
+ if (binding == HYD_BIND_RR) {
+ return (old_core + 1);
+ }
+ else if (binding == HYD_BIND_BUDDY) {
+ found = 0;
+ for (core = 0; core < HYDU_bind_info.num_cores; core++)
+ for (socket = 0; socket < HYDU_bind_info.num_sockets; socket++) {
+ ret = plpa_map_to_processor_id(socket, core, &proc);
+ if (ret)
+ HYDU_ERR_SETANDJUMP2(status, HYD_INTERNAL_ERROR,
+ "plpa map_to_proc_id failed (%d,%d)\n",
+ socket, core);
+
+ if (found)
+ return proc;
+ else if (proc != core)
+ continue;
+ else
+ found = 1;
+ }
+
+ return -1;
+ }
+ else if (binding == HYD_BIND_PACK) {
+ found = 0;
+ for (socket = 0; socket < HYDU_bind_info.num_sockets; socket++) {
+ for (core = 0; core < HYDU_bind_info.num_cores; core++)
+ ret = plpa_map_to_processor_id(socket, core, &proc);
+ if (ret)
+ HYDU_ERR_SETANDJUMP2(status, HYD_INTERNAL_ERROR,
+ "plpa map_to_proc_id failed (%d,%d)\n",
+ socket, core);
+
+ if (found)
+ return proc;
+ else if (proc != core)
+ continue;
+ else
+ found = 1;
+ }
+
+ return -1;
+ }
+ else
+ return -1;
+ }
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return new_core;
+
+ fn_fail:
+ new_core = -1;
+ goto fn_exit;
+}
Modified: mpich2/trunk/src/pm/hydra/utils/launch/launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/launch/launch.c 2009-03-22 06:18:32 UTC (rev 4157)
+++ mpich2/trunk/src/pm/hydra/utils/launch/launch.c 2009-03-22 09:31:53 UTC (rev 4158)
@@ -41,20 +41,21 @@
HYDU_ERR_SETANDJUMP1(status, HYD_SOCK_ERROR, "dup2 error (%s)\n",
HYDU_strerror(errno));
- if (core >= 0) {
- status = HYDU_bind_process(core);
- HYDU_ERR_POP(status, "bind process failed\n");
- }
-
close(inpipe[1]);
close(0);
- if (in && (dup2(inpipe[0], 0) < 0))
+ if (in && (dup2(inpipe[0], 0) < 0)) {
HYDU_ERR_SETANDJUMP1(status, HYD_SOCK_ERROR, "dup2 error (%s)\n",
HYDU_strerror(errno));
+ }
status = HYDU_putenv_list(env_list);
HYDU_ERR_POP(status, "unable to putenv\n");
+ if (core >= 0) {
+ HYDU_bind_process(core);
+ HYDU_ERR_POP(status, "bind process failed\n");
+ }
+
if (execvp(client_arg[0], client_arg) < 0) {
HYDU_ERR_SETANDJUMP1(status, HYD_INTERNAL_ERROR, "execvp error (%s)\n",
HYDU_strerror(errno));
More information about the mpich2-commits
mailing list