[mpich2-commits] r3972 - in mpich2/trunk/src/pm/hydra: bootstrap/ssh bootstrap/utils control/consys include launcher/mpiexec launcher/utils pm/central utils/launch

balaji at mcs.anl.gov balaji at mcs.anl.gov
Sun Mar 8 18:35:27 CDT 2009


Author: balaji
Date: 2009-03-08 18:35:27 -0500 (Sun, 08 Mar 2009)
New Revision: 3972

Added:
   mpich2/trunk/src/pm/hydra/utils/launch/allocate.c
Removed:
   mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_finalize.c
   mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_init.c
Modified:
   mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_finalize.c
   mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_launch.c
   mpich2/trunk/src/pm/hydra/bootstrap/utils/Makefile.sm
   mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu.h
   mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_wait.c
   mpich2/trunk/src/pm/hydra/control/consys/consys_close.c
   mpich2/trunk/src/pm/hydra/control/consys/consys_launch.c
   mpich2/trunk/src/pm/hydra/control/consys/consys_wait.c
   mpich2/trunk/src/pm/hydra/include/hydra.h
   mpich2/trunk/src/pm/hydra/include/hydra_launch.h
   mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c
   mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c
   mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c
   mpich2/trunk/src/pm/hydra/launcher/utils/lchu.h
   mpich2/trunk/src/pm/hydra/pm/central/central_launch.c
   mpich2/trunk/src/pm/hydra/utils/launch/Makefile.sm
Log:
Infrastructure changes to the Hydra framework to support the proxy.


Modified: mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_finalize.c
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_finalize.c	2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_finalize.c	2009-03-08 23:35:27 UTC (rev 3972)
@@ -16,18 +16,6 @@
 
     HYDU_FUNC_ENTER();
 
-    status = HYD_BSCU_Finalize_exit_status();
-    if (status != HYD_SUCCESS) {
-        HYDU_Error_printf("unable to finalize exit status\n");
-        goto fn_fail;
-    }
-
-    status = HYD_BSCU_Finalize_io_fds();
-    if (status != HYD_SUCCESS) {
-        HYDU_Error_printf("unable to finalize I/O fds\n");
-        goto fn_fail;
-    }
-
   fn_exit:
     HYDU_FUNC_EXIT();
     return status;

Modified: mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_launch.c	2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/bootstrap/ssh/ssh_launch.c	2009-03-08 23:35:27 UTC (rev 3972)
@@ -29,76 +29,54 @@
 
     HYDU_FUNC_ENTER();
 
-    status = HYD_BSCU_Init_exit_status();
-    if (status != HYD_SUCCESS) {
-        HYDU_Error_printf("bootstrap utils returned error when initializing exit status\n");
-        goto fn_fail;
-    }
-
     status = HYD_BSCU_Set_common_signals(HYD_BSCU_Signal_handler);
     if (status != HYD_SUCCESS) {
         HYDU_Error_printf("signal utils returned error when trying to set signal\n");
         goto fn_fail;
     }
 
-    status = HYD_BSCU_Init_io_fds();
-    if (status != HYD_SUCCESS) {
-        HYDU_Error_printf("bootstrap utils returned error when initializing io fds\n");
-        goto fn_fail;
-    }
-
     process_id = 0;
     for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
         for (partition = proc_params->partition; partition; partition = partition->next) {
-            for (i = 0; i < partition->proc_count; i++) {
-                /* Setup the executable arguments */
-                arg = 0;
-                client_arg[arg++] = MPIU_Strdup("/usr/bin/ssh");
+            if (partition->group_rank) /* Only rank 0 is spawned */
+                continue;
 
-                /* Allow X forwarding only if explicitly requested */
-                if (handle.enablex == 1)
-                    client_arg[arg++] = MPIU_Strdup("-X");
-                else if (handle.enablex == 0)
-                    client_arg[arg++] = MPIU_Strdup("-x");
-                else        /* default mode is disable X */
-                    client_arg[arg++] = MPIU_Strdup("-x");
+            /* Setup the executable arguments */
+            arg = 0;
+            client_arg[arg++] = MPIU_Strdup("/usr/bin/ssh");
 
-                /* ssh does not support any partition names other than host names */
-                client_arg[arg++] = MPIU_Strdup(partition->name);
+            /* Allow X forwarding only if explicitly requested */
+            if (handle.enablex == 1)
+                client_arg[arg++] = MPIU_Strdup("-X");
+            else if (handle.enablex == 0)
+                client_arg[arg++] = MPIU_Strdup("-x");
+            else        /* default mode is disable X */
+                client_arg[arg++] = MPIU_Strdup("-x");
 
-                client_arg[arg++] = MPIU_Strdup("sh");
-                client_arg[arg++] = MPIU_Strdup("-c");
-                client_arg[arg++] = MPIU_Strdup("\"");
-                client_arg[arg++] = NULL;
+            /* ssh does not support any partition names other than host names */
+            client_arg[arg++] = MPIU_Strdup(partition->name);
 
-                HYDU_Append_env(handle.system_env, client_arg, process_id);
-                HYDU_Append_env(proc_params->prop_env, client_arg, process_id);
-                HYDU_Append_wdir(client_arg);
-                HYDU_Append_exec(proc_params->exec, client_arg);
+            for (i = 0; partition->args[i]; i++)
+                client_arg[arg++] = MPIU_Strdup(partition->args[i]);
+            client_arg[arg] = NULL;
 
-                for (arg = 0; client_arg[arg]; arg++);
-                client_arg[arg++] = MPIU_Strdup("\"");
-                client_arg[arg++] = NULL;
+            /* The stdin pointer will be some value for process_id 0;
+             * for everyone else, it's NULL. */
+            status = HYDU_Create_process(client_arg, (process_id == 0 ? &handle.in : NULL),
+                                         &partition->out, &partition->err, &partition->pid);
+            if (status != HYD_SUCCESS) {
+                HYDU_Error_printf("bootstrap spawn process returned error\n");
+                goto fn_fail;
+            }
 
-                /* The stdin pointer will be some value for process_id
-                 * 0; for everyone else, it's NULL. */
-                status = HYDU_Create_process(client_arg, (process_id == 0 ? &handle.in : NULL),
-                                             &proc_params->out[i], &proc_params->err[i],
-                                             &proc_params->pid[i]);
-                if (status != HYD_SUCCESS) {
-                    HYDU_Error_printf("bootstrap spawn process returned error\n");
-                    goto fn_fail;
-                }
+            for (arg = 0; client_arg[arg]; arg++)
+                HYDU_FREE(client_arg[arg]);
 
-                for (arg = 0; client_arg[arg]; arg++)
-                    HYDU_FREE(client_arg[arg]);
+            /* For the remaining processes, set the stdin fd to -1 */
+            if (process_id != 0)
+                handle.in = -1;
 
-                /* For the remaining processes, set the stdin fd to -1 */
-                if (process_id != 0)
-                    handle.in = -1;
-
-                process_id++;
-            }
+            process_id++;
         }
     }
 

Modified: mpich2/trunk/src/pm/hydra/bootstrap/utils/Makefile.sm
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/utils/Makefile.sm	2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/bootstrap/utils/Makefile.sm	2009-03-08 23:35:27 UTC (rev 3972)
@@ -7,7 +7,7 @@
 HYDRA_LIB_PATH = ../../lib
 
 libhydra_a_DIR = ${HYDRA_LIB_PATH}
-libhydra_a_SOURCES = bscu_init.c bscu_finalize.c bscu_wait.c bscu_signal.c
+libhydra_a_SOURCES = bscu_wait.c bscu_signal.c
 INCLUDES = -I${abs_srcdir}/../../include \
 	-I${abs_srcdir}/../../../../include \
 	-I../../include \

Modified: mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu.h
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu.h	2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu.h	2009-03-08 23:35:27 UTC (rev 3972)
@@ -11,10 +11,6 @@
 #include "hydra_sig.h"
 #include "bsci.h"
 
-HYD_Status HYD_BSCU_Init_exit_status(void);
-HYD_Status HYD_BSCU_Finalize_exit_status(void);
-HYD_Status HYD_BSCU_Init_io_fds(void);
-HYD_Status HYD_BSCU_Finalize_io_fds(void);
 HYD_Status HYD_BSCU_Wait_for_completion(void);
 HYD_Status HYD_BSCU_Set_common_signals(void (*handler) (int));
 void HYD_BSCU_Signal_handler(int signal);

Deleted: mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_finalize.c
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_finalize.c	2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_finalize.c	2009-03-08 23:35:27 UTC (rev 3972)
@@ -1,51 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- *  (C) 2008 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#include "hydra.h"
-#include "hydra_mem.h"
-#include "bsci.h"
-#include "bscu.h"
-
-HYD_Handle handle;
-
-HYD_Status HYD_BSCU_Finalize_exit_status(void)
-{
-    struct HYD_Proc_params *proc_params;
-    int i;
-    HYD_Status status = HYD_SUCCESS;
-
-    HYDU_FUNC_ENTER();
-
-    proc_params = handle.proc_params;
-    while (proc_params) {
-        HYDU_FREE(proc_params->pid);
-        HYDU_FREE(proc_params->exit_status);
-        HYDU_FREE(proc_params->exit_status_valid);
-        proc_params = proc_params->next;
-    }
-
-    HYDU_FUNC_EXIT();
-    return status;
-}
-
-
-HYD_Status HYD_BSCU_Finalize_io_fds(void)
-{
-    struct HYD_Proc_params *proc_params;
-    HYD_Status status = HYD_SUCCESS;
-
-    HYDU_FUNC_ENTER();
-
-    proc_params = handle.proc_params;
-    while (proc_params) {
-        HYDU_FREE(proc_params->out);
-        HYDU_FREE(proc_params->err);
-        proc_params = proc_params->next;
-    }
-
-    HYDU_FUNC_EXIT();
-    return status;
-}

Deleted: mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_init.c
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_init.c	2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_init.c	2009-03-08 23:35:27 UTC (rev 3972)
@@ -1,66 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- *  (C) 2008 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#include "hydra.h"
-#include "hydra_mem.h"
-#include "bsci.h"
-#include "bscu.h"
-
-HYD_Handle handle;
-
-HYD_Status HYD_BSCU_Init_exit_status(void)
-{
-    struct HYD_Proc_params *proc_params;
-    int i;
-    HYD_Status status = HYD_SUCCESS;
-
-    HYDU_FUNC_ENTER();
-
-    /* Set the exit status of all processes to 1 (> 0 means that the
-     * status is not set yet). Also count the number of processes in
-     * the same loop. */
-    proc_params = handle.proc_params;
-    while (proc_params) {
-        HYDU_MALLOC(proc_params->pid, int *, proc_params->exec_proc_count * sizeof(int), status);
-        HYDU_MALLOC(proc_params->exit_status, int *, proc_params->exec_proc_count * sizeof(int),
-                    status);
-        HYDU_MALLOC(proc_params->exit_status_valid, int *, proc_params->exec_proc_count * sizeof(int),
-                    status);
-        for (i = 0; i < proc_params->exec_proc_count; i++)
-            proc_params->exit_status_valid[i] = 0;
-        proc_params = proc_params->next;
-    }
-
-  fn_exit:
-    HYDU_FUNC_EXIT();
-    return status;
-
-  fn_fail:
-    goto fn_exit;
-}
-
-
-HYD_Status HYD_BSCU_Init_io_fds(void)
-{
-    struct HYD_Proc_params *proc_params;
-    HYD_Status status = HYD_SUCCESS;
-
-    HYDU_FUNC_ENTER();
-
-    proc_params = handle.proc_params;
-    while (proc_params) {
-        HYDU_MALLOC(proc_params->out, int *, proc_params->exec_proc_count * sizeof(int), status);
-        HYDU_MALLOC(proc_params->err, int *, proc_params->exec_proc_count * sizeof(int), status);
-        proc_params = proc_params->next;
-    }
-
-  fn_exit:
-    HYDU_FUNC_EXIT();
-    return status;
-
-  fn_fail:
-    goto fn_exit;
-}

Modified: mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_wait.c
===================================================================
--- mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_wait.c	2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/bootstrap/utils/bscu_wait.c	2009-03-08 23:35:27 UTC (rev 3972)
@@ -21,18 +21,16 @@
 {
     int pid, ret_status, i, not_completed;
     struct HYD_Proc_params *proc_params;
+    struct HYD_Partition_list *partition;
     HYD_Status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
 
     not_completed = 0;
-    proc_params = handle.proc_params;
-    while (proc_params) {
-        for (i = 0; i < proc_params->exec_proc_count; i++)
-            if (proc_params->exit_status_valid[i] == 0)
+    for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next)
+        for (partition = proc_params->partition; partition; partition = partition->next)
+            if (partition->exit_status == -1)
                 not_completed++;
-        proc_params = proc_params->next;
-    }
 
     /* We get here only after the I/O sockets have been closed. If the
      * application did not manually close its stdout and stderr
@@ -43,16 +41,13 @@
         pid = waitpid(-1, &ret_status, WNOHANG);
         if (pid > 0) {
             /* Find the pid and mark it as complete. */
-            proc_params = handle.proc_params;
-            while (proc_params) {
-                for (i = 0; i < proc_params->exec_proc_count; i++) {
-                    if (proc_params->pid[i] == pid) {
-                        proc_params->exit_status[i] = WEXITSTATUS(ret_status);
-                        proc_params->exit_status_valid[i] = 1;
+            for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
+                for (partition = proc_params->partition; partition; partition = partition->next) {
+                    if (partition->pid == pid) {
+                        partition->exit_status = WEXITSTATUS(ret_status);
                         not_completed--;
                     }
                 }
-                proc_params = proc_params->next;
             }
         }
         if (HYD_CSU_Time_left() == 0)

Modified: mpich2/trunk/src/pm/hydra/control/consys/consys_close.c
===================================================================
--- mpich2/trunk/src/pm/hydra/control/consys/consys_close.c	2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/control/consys/consys_close.c	2009-03-08 23:35:27 UTC (rev 3972)
@@ -17,6 +17,7 @@
 {
     int i;
     struct HYD_Proc_params *proc_params;
+    struct HYD_Partition_list *partition;
     HYD_Status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
@@ -31,19 +32,17 @@
     close(fd);
 
     /* Find the FD in the handle and remove it. */
-    proc_params = handle.proc_params;
-    while (proc_params) {
-        for (i = 0; i < proc_params->exec_proc_count; i++) {
-            if (proc_params->out[i] == fd) {
-                proc_params->out[i] = -1;
+    for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
+        for (partition = proc_params->partition; partition; partition = partition->next) {
+            if (partition->out == fd) {
+                partition->out = -1;
                 goto fn_exit;
             }
-            if (proc_params->err[i] == fd) {
-                proc_params->err[i] = -1;
+            if (partition->err == fd) {
+                partition->err = -1;
                 goto fn_exit;
             }
         }
-        proc_params = proc_params->next;
     }
 
   fn_exit:

Modified: mpich2/trunk/src/pm/hydra/control/consys/consys_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/control/consys/consys_launch.c	2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/control/consys/consys_launch.c	2009-03-08 23:35:27 UTC (rev 3972)
@@ -15,6 +15,7 @@
 HYD_Status HYD_CSI_Launch_procs(void)
 {
     struct HYD_Proc_params *proc_params;
+    struct HYD_Partition_list *partition;
     int stdin_fd, flags, count;
     HYD_Status status = HYD_SUCCESS;
 
@@ -26,23 +27,20 @@
         goto fn_fail;
     }
 
-    proc_params = handle.proc_params;
-    while (proc_params) {
-        status = HYD_DMX_Register_fd(proc_params->exec_proc_count, proc_params->out,
-                                     HYD_STDOUT, proc_params->stdout_cb);
-        if (status != HYD_SUCCESS) {
-            HYDU_Error_printf("demux engine returned error when registering fd\n");
-            goto fn_fail;
-        }
+    for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
+        for (partition = proc_params->partition; partition; partition = partition->next) {
+            status = HYD_DMX_Register_fd(1, &partition->out, HYD_STDOUT, proc_params->stdout_cb);
+            if (status != HYD_SUCCESS) {
+                HYDU_Error_printf("demux engine returned error when registering fd\n");
+                goto fn_fail;
+            }
 
-        status = HYD_DMX_Register_fd(proc_params->exec_proc_count, proc_params->err,
-                                     HYD_STDOUT, proc_params->stderr_cb);
-        if (status != HYD_SUCCESS) {
-            HYDU_Error_printf("demux engine returned error when registering fd\n");
-            goto fn_fail;
+            status = HYD_DMX_Register_fd(1, &partition->err, HYD_STDOUT, proc_params->stderr_cb);
+            if (status != HYD_SUCCESS) {
+                HYDU_Error_printf("demux engine returned error when registering fd\n");
+                goto fn_fail;
+            }
         }
-
-        proc_params = proc_params->next;
     }
 
     if (handle.in != -1) {      /* Only process_id 0 */

Modified: mpich2/trunk/src/pm/hydra/control/consys/consys_wait.c
===================================================================
--- mpich2/trunk/src/pm/hydra/control/consys/consys_wait.c	2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/control/consys/consys_wait.c	2009-03-08 23:35:27 UTC (rev 3972)
@@ -16,6 +16,7 @@
 {
     int sockets_open, i;
     struct HYD_Proc_params *proc_params;
+    struct HYD_Partition_list *partition;
     HYD_Status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
@@ -30,19 +31,16 @@
 
         /* Check to see if there's any open read socket left; if there
          * are, we will just wait for more events. */
-        proc_params = handle.proc_params;
         sockets_open = 0;
-        while (proc_params) {
-            for (i = 0; i < proc_params->exec_proc_count; i++) {
-                if (proc_params->out[i] != -1 || proc_params->err[i] != -1) {
+        for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
+            for (partition = proc_params->partition; partition; partition = partition->next) {
+                if (partition->out != -1 || partition->err != -1) {
                     sockets_open++;
                     break;
                 }
             }
             if (sockets_open)
                 break;
-
-            proc_params = proc_params->next;
         }
 
         if (sockets_open && HYD_CSU_Time_left())

Modified: mpich2/trunk/src/pm/hydra/include/hydra.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra.h	2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/include/hydra.h	2009-03-08 23:35:27 UTC (rev 3972)
@@ -96,6 +96,7 @@
 
     char *host_file;
 
+    /* Global environment */
     HYD_Env_t *global_env;
     HYD_Env_t *system_env;
     HYD_Env_t *user_env;
@@ -115,37 +116,56 @@
      * executable and environment. */
     struct HYD_Proc_params {
         int  exec_proc_count;
+        char *exec[HYD_EXEC_ARGS];
+
         struct HYD_Partition_list {
             char  * name;
             int     proc_count;
             char ** mapping; /* Can be core IDs or something else */
+
+            /*
+             * The boot-strap server is expected to start a single
+             * executable on the first possible node and return a
+             * single PID. This executable could be a PM proxy that
+             * will launch the actual application on the rest of the
+             * partition list.
+             *
+             * Possible hacks:
+             *
+             *   1. If the process manager needs more proxies within
+             *      this same list, it can use different group
+             *      IDs. Each group ID will have its own proxy.
+             *
+             *   2. If no proxy is needed, the PM can split this list
+             *      into one element per process. The boot-strap
+             *      server itself does not distinguish a proxy from
+             *      the application executable, so it will not require
+             *      any changes.
+             *
+             *   3. One proxy per physical node means that each
+             *      partition will have a different group ID.
+             */
+            int     group_id; /* Assumed to be in ascending order */
+            int     group_rank; /* Rank within the group */
+            int     pid;
+            int     out;
+            int     err;
+            int     exit_status;
+            char  * args[HYD_EXEC_ARGS];
+
             struct HYD_Partition_list *next;
         } *partition;
 
-        char *exec[HYD_EXEC_ARGS];
+        /* Local environment */
         HYD_Env_t *user_env;
         HYD_Env_prop_t prop;
         HYD_Env_t *prop_env;
 
-        /* These output FDs are filled in by the lower layers */
-        int *out;
-        int *err;
-
         /* Callback functions for the stdout/stderr events. These can
          * be the same. */
          HYD_Status(*stdout_cb) (int fd, HYD_Event_t events);
          HYD_Status(*stderr_cb) (int fd, HYD_Event_t events);
 
-        /* Status > 0 means that it is not set yet. Successful
-         * completion of a process will set the status to 0. An error
-         * will set this to a negative value corresponding to the
-         * error. Depending on the bootstrap server, these values
-         * might correspond to per-process status, or can be a common
-         * value for all processes. */
-        int *pid;
-        int *exit_status;
-        int *exit_status_valid;
-
         struct HYD_Proc_params *next;
     } *proc_params;
 

Modified: mpich2/trunk/src/pm/hydra/include/hydra_launch.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra_launch.h	2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/include/hydra_launch.h	2009-03-08 23:35:27 UTC (rev 3972)
@@ -12,6 +12,7 @@
 HYD_Status HYDU_Append_env(HYD_Env_t * env_list, char **client_arg, int id);
 HYD_Status HYDU_Append_exec(char **exec, char **client_arg);
 HYD_Status HYDU_Append_wdir(char **client_arg);
+HYD_Status HYDU_Allocate_Partition(struct HYD_Partition_list **partition);
 HYD_Status HYDU_Create_process(char **client_arg, int *in, int *out, int *err, int *pid);
 
 #endif /* HYDRA_LAUNCH_H_INCLUDED */

Modified: mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c	2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c	2009-03-08 23:35:27 UTC (rev 3972)
@@ -45,6 +45,7 @@
 int main(int argc, char **argv)
 {
     struct HYD_Proc_params *proc_params;
+    struct HYD_Partition_list *partition;
     int exit_status, i;
     HYD_Status status = HYD_SUCCESS;
 
@@ -109,13 +110,10 @@
     }
 
     /* Check for the exit status for all the processes */
-    proc_params = handle.proc_params;
     exit_status = 0;
-    while (proc_params) {
-        for (i = 0; i < proc_params->exec_proc_count; i++)
-            exit_status |= proc_params->exit_status[i];
-        proc_params = proc_params->next;
-    }
+    for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next)
+        for (partition = proc_params->partition; partition; partition = partition->next)
+            exit_status |= partition->exit_status;
 
     /* Call finalize functions for lower layers to cleanup their resources */
     status = HYD_CSI_Finalize();

Modified: mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c	2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/launcher/mpiexec/utils.c	2009-03-08 23:35:27 UTC (rev 3972)
@@ -49,11 +49,8 @@
     proc_params->user_env = NULL;
     proc_params->prop = HYD_ENV_PROP_UNSET;
     proc_params->prop_env = NULL;
-    proc_params->out = NULL;
-    proc_params->err = NULL;
     proc_params->stdout_cb = NULL;
     proc_params->stderr_cb = NULL;
-    proc_params->exit_status = NULL;
     proc_params->next = NULL;
 
     *params = proc_params;

Modified: mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c	2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/launcher/utils/lchu.c	2009-03-08 23:35:27 UTC (rev 3972)
@@ -32,13 +32,9 @@
     proc_params = handle.proc_params;
     while (proc_params) {
         if (!strcmp(handle.host_file, "HYDRA_USE_LOCALHOST")) {
-            HYDU_MALLOC(proc_params->partition, struct HYD_Partition_list *,
-                        sizeof(struct HYD_Partition_list), status);
-
+            HYDU_Allocate_Partition(&proc_params->partition);
             proc_params->partition->name = MPIU_Strdup("localhost");
             proc_params->partition->proc_count = proc_params->exec_proc_count;
-            proc_params->partition->mapping = NULL;
-            proc_params->partition->next = NULL;
             total_procs = proc_params->exec_proc_count;
         }
         else {
@@ -59,20 +55,18 @@
                 if (num_procs > (proc_params->exec_proc_count - total_procs))
                     num_procs = (proc_params->exec_proc_count - total_procs);
 
-                if (proc_params->partition) {
+                if (!proc_params->partition) {
+                    HYDU_Allocate_Partition(&proc_params->partition);
+                    partition = proc_params->partition;
+                }
+                else {
                     for (partition = proc_params->partition; partition->next;
                          partition = partition->next);
-                    HYDU_MALLOC(partition->next, struct HYD_Partition_list *,
-                                sizeof(struct HYD_Partition_list), status);
+                    HYDU_Allocate_Partition(&partition->next);
                     partition = partition->next;
                 }
-
                 partition->name = MPIU_Strdup(hostname);
-
-                /* FIXME: We don't support mappings yet */
-                partition->mapping = NULL;
                 partition->proc_count = num_procs;
-                partition->next = NULL;
 
                 total_procs += num_procs;
                 if (total_procs == proc_params->exec_proc_count)
@@ -198,15 +192,16 @@
 HYD_Status HYD_LCHU_Free_io(void)
 {
     struct HYD_Proc_params *proc_params;
+    struct HYD_Partition_list *partition;
     HYD_Status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
 
-    proc_params = handle.proc_params;
-    while (proc_params) {
-        HYDU_FREE(proc_params->out);
-        HYDU_FREE(proc_params->err);
-        proc_params = proc_params->next;
+    for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
+        for (partition = proc_params->partition; partition; partition = partition->next) {
+            HYDU_FREE(partition->out);
+            HYDU_FREE(partition->err);
+        }
     }
 
     HYDU_FUNC_EXIT();
@@ -214,24 +209,6 @@
 }
 
 
-HYD_Status HYD_LCHU_Free_exits(void)
-{
-    struct HYD_Proc_params *proc_params;
-    HYD_Status status = HYD_SUCCESS;
-
-    HYDU_FUNC_ENTER();
-
-    proc_params = handle.proc_params;
-    while (proc_params) {
-        HYDU_FREE(proc_params->exit_status);
-        proc_params = proc_params->next;
-    }
-
-    HYDU_FUNC_EXIT();
-    return status;
-}
-
-
 HYD_Status HYD_LCHU_Free_exec(void)
 {
     struct HYD_Proc_params *proc_params;

Modified: mpich2/trunk/src/pm/hydra/launcher/utils/lchu.h
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/utils/lchu.h	2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/launcher/utils/lchu.h	2009-03-08 23:35:27 UTC (rev 3972)
@@ -14,7 +14,6 @@
 HYD_Status HYD_LCHU_Create_env_list(void);
 HYD_Status HYD_LCHU_Free_env_list(void);
 HYD_Status HYD_LCHU_Free_io(void);
-HYD_Status HYD_LCHU_Free_exits(void);
 HYD_Status HYD_LCHU_Free_exec(void);
 HYD_Status HYD_LCHU_Free_proc_params(void);
 

Modified: mpich2/trunk/src/pm/hydra/pm/central/central_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/central/central_launch.c	2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/pm/central/central_launch.c	2009-03-08 23:35:27 UTC (rev 3972)
@@ -32,18 +32,20 @@
  * auto-incrementing variable; the bootstrap server will take care of
  * adding the process ID to the start value.
  *
- * 5. Ask the bootstrap server to launch the processes.
+ * 5. Create a process info setup and ask the bootstrap server to
+ * launch the processes.
  */
 HYD_Status HYD_PMCI_Launch_procs(void)
 {
     char *port_range, *port_str, *sport;
     uint16_t low_port, high_port, port;
-    int one = 1, i;
-    int num_procs;
+    int one = 1, i, arg;
+    int num_procs, process_id, group_id;
     char hostname[MAX_HOSTNAME_LEN];
     struct sockaddr_in sa;
     HYD_Env_t *env;
     struct HYD_Proc_params *proc_params;
+    struct HYD_Partition_list *partition, *run, *next_partition;
     HYD_Status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
@@ -141,6 +143,58 @@
         goto fn_fail;
     }
 
+    /* FIXME: Temporary hack for testing till the proxy is in shape to
+     * be used -- we just break the partition list to multiple
+     * segments, one for each process and call the application
+     * executable directly. */
+    for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
+        group_id = 0;
+        for (partition = proc_params->partition; partition;) {
+            next_partition = partition->next; /* Keep track of the next partition */
+
+            partition->group_id = group_id++;
+            partition->group_rank = 0;
+
+            run = partition;
+            for (process_id = 1; process_id < partition->proc_count; process_id++) {
+                HYDU_Allocate_Partition(&run->next);
+                run = run->next;
+
+                run->name = MPIU_Strdup(partition->name);
+                run->proc_count = 1;
+                run->group_id = group_id++;
+                run->group_rank = 0;
+            }
+
+            partition->proc_count = 1;
+            partition = next_partition;
+        }
+    }
+
+    /* Create the arguments list for each proxy */
+    process_id = 0;
+    for (proc_params = handle.proc_params; proc_params; proc_params = proc_params->next) {
+        for (partition = proc_params->partition; partition; partition = partition->next) {
+            /* Setup the executable arguments */
+            arg = 0;
+            partition->args[arg++] = MPIU_Strdup("sh");
+            partition->args[arg++] = MPIU_Strdup("-c");
+            partition->args[arg++] = MPIU_Strdup("\"");
+            partition->args[arg++] = NULL;
+
+            HYDU_Append_env(handle.system_env, partition->args, process_id);
+            HYDU_Append_env(proc_params->prop_env, partition->args, process_id);
+            HYDU_Append_wdir(partition->args);
+            HYDU_Append_exec(proc_params->exec, partition->args);
+
+            for (arg = 0; partition->args[arg]; arg++);
+            partition->args[arg++] = MPIU_Strdup("\"");
+            partition->args[arg++] = NULL;
+
+            process_id++;
+        }
+    }
+
     /* Initialize the bootstrap server and ask it to launch the
      * processes. */
     status = HYD_BSCI_Launch_procs();

Modified: mpich2/trunk/src/pm/hydra/utils/launch/Makefile.sm
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/launch/Makefile.sm	2009-03-08 03:49:50 UTC (rev 3971)
+++ mpich2/trunk/src/pm/hydra/utils/launch/Makefile.sm	2009-03-08 23:35:27 UTC (rev 3972)
@@ -7,7 +7,7 @@
 HYDRA_LIB_PATH = ../../lib
 
 libhydra_a_DIR = ${HYDRA_LIB_PATH}
-libhydra_a_SOURCES = args.c launch.c
+libhydra_a_SOURCES = args.c allocate.c launch.c
 INCLUDES = -I${abs_srcdir}/../../include \
 	-I${abs_srcdir}/../../../../include \
 	-I../../include \

Added: mpich2/trunk/src/pm/hydra/utils/launch/allocate.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/launch/allocate.c	                        (rev 0)
+++ mpich2/trunk/src/pm/hydra/utils/launch/allocate.c	2009-03-08 23:35:27 UTC (rev 3972)
@@ -0,0 +1,40 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *  (C) 2008 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include "hydra.h"
+#include "hydra_mem.h"
+#include "hydra_launch.h"
+
+HYD_Handle handle;
+
+HYD_Status HYDU_Allocate_Partition(struct HYD_Partition_list ** partition)
+{
+    HYD_Status status = HYD_SUCCESS;
+
+    HYDU_FUNC_ENTER();
+
+    HYDU_MALLOC(*partition, struct HYD_Partition_list *, sizeof(struct HYD_Partition_list),
+                status);
+    (*partition)->name = NULL;
+    (*partition)->proc_count = 0;
+    (*partition)->mapping = NULL;
+    (*partition)->group_id = -1;
+    (*partition)->group_rank = -1;
+    (*partition)->pid = -1;
+    (*partition)->out = -1;
+    (*partition)->err = -1;
+    (*partition)->exit_status = -1;
+    (*partition)->args[0] = NULL;
+
+    (*partition)->next = NULL;
+
+  fn_exit:
+    HYDU_FUNC_EXIT();
+    return status;
+
+  fn_fail:
+    goto fn_exit;
+}



More information about the mpich2-commits mailing list