[mpich2-commits] r4227 - in mpich2/trunk/src/pm/hydra: launcher/mpiexec pm/pmiserv

balaji at mcs.anl.gov balaji at mcs.anl.gov
Mon Mar 30 19:40:19 CDT 2009


Author: balaji
Date: 2009-03-30 19:40:19 -0500 (Mon, 30 Mar 2009)
New Revision: 4227

Modified:
   mpich2/trunk/src/pm/hydra/launcher/mpiexec/callback.c
   mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle_v1.c
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c
Log:
Bug fixes to the persistent proxy code for the case when the proxy is
booted on more nodes that what is needed by the application.


Modified: mpich2/trunk/src/pm/hydra/launcher/mpiexec/callback.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/mpiexec/callback.c	2009-03-30 22:13:31 UTC (rev 4226)
+++ mpich2/trunk/src/pm/hydra/launcher/mpiexec/callback.c	2009-03-31 00:40:19 UTC (rev 4227)
@@ -25,7 +25,8 @@
     close(fd);
 
     /* Find the FD in the handle and remove it. */
-    for (partition = handle.partition_list; partition; partition = partition->next) {
+    for (partition = handle.partition_list; partition && partition->exec_list;
+         partition = partition->next) {
         if (partition->out == fd) {
             partition->out = -1;
             goto fn_exit;

Modified: mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c
===================================================================
--- mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c	2009-03-30 22:13:31 UTC (rev 4226)
+++ mpich2/trunk/src/pm/hydra/launcher/mpiexec/mpiexec.c	2009-03-31 00:40:19 UTC (rev 4227)
@@ -110,7 +110,8 @@
     }
 
     /* Setup stdout/stderr/stdin handlers */
-    for (partition = handle.partition_list; partition; partition = partition->next) {
+    for (partition = handle.partition_list; partition && partition->exec_list;
+         partition = partition->next) {
         status = HYD_DMX_register_fd(1, &partition->out, HYD_STDOUT, NULL, HYD_LCHI_stdout_cb);
         HYDU_ERR_POP(status, "demux returned error registering fd\n");
 
@@ -138,7 +139,8 @@
 
     /* Check for the exit status for all the processes */
     exit_status = 0;
-    for (partition = handle.partition_list; partition; partition = partition->next)
+    for (partition = handle.partition_list; partition && partition->exec_list;
+         partition = partition->next)
         exit_status |= partition->exit_status;
 
     /* Call finalize functions for lower layers to cleanup their resources */

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle.c	2009-03-30 22:13:31 UTC (rev 4226)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle.c	2009-03-31 00:40:19 UTC (rev 4227)
@@ -108,7 +108,8 @@
 
     /* Find the number of processes in the PG */
     num_procs = 0;
-    for (partition = handle.partition_list; partition; partition = partition->next)
+    for (partition = handle.partition_list; partition && partition->exec_list;
+         partition = partition->next)
         for (exec = partition->exec_list; exec; exec = exec->next)
             num_procs += exec->proc_count;
 

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle_v1.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle_v1.c	2009-03-30 22:13:31 UTC (rev 4226)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_handle_v1.c	2009-03-31 00:40:19 UTC (rev 4227)
@@ -94,7 +94,8 @@
     id = atoi(strtok(NULL, "="));
 
     size = 0;
-    for (partition = handle.partition_list; partition; partition = partition->next)
+    for (partition = handle.partition_list; partition && partition->exec_list;
+         partition = partition->next)
         for (exec = partition->exec_list; exec; exec = exec->next)
             size += exec->proc_count;
 

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c	2009-03-30 22:13:31 UTC (rev 4226)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmi_serv_launch.c	2009-03-31 00:40:19 UTC (rev 4227)
@@ -31,11 +31,9 @@
 
     /* Create the arguments list for each proxy */
     process_id = 0;
-    for (partition = handle.partition_list; partition; partition = partition->next) {
+    for (partition = handle.partition_list; partition && partition->exec_list;
+         partition = partition->next) {
 
-        if (partition->exec_list == NULL)
-            break;
-
         arg = HYDU_strlist_lastidx(partition->proxy_args);
         i = 0;
         path_str[i++] = HYDU_strdup(handle.base_path);
@@ -265,7 +263,9 @@
     HYDU_ERR_POP(status, "bootstrap server initialization failed\n");
 
     first_partition = 1;
-    for (partition = handle.partition_list; partition; partition = partition->next) {
+    for (partition = handle.partition_list; partition && partition->exec_list;
+         partition = partition->next) {
+
         status = HYDU_sock_connect(partition->name, handle.proxy_port, &partition->control_fd);
         HYDU_ERR_POP(status, "unable to connect to proxy\n");
 
@@ -452,7 +452,8 @@
             /* Check to see if there's any open read socket left; if
              * there are, we will just wait for more events. */
             sockets_open = 0;
-            for (partition = handle.partition_list; partition; partition = partition->next) {
+            for (partition = handle.partition_list; partition && partition->exec_list;
+                 partition = partition->next) {
                 if (partition->out != -1 || partition->err != -1) {
                     sockets_open++;
                     break;
@@ -477,7 +478,8 @@
             do {
                 /* Check if the exit status has already arrived */
                 all_procs_exited = 1;
-                for (partition = handle.partition_list; partition; partition = partition->next) {
+                for (partition = handle.partition_list; partition && partition->exec_list;
+                     partition = partition->next) {
                     if (partition->exit_status == -1) {
                         all_procs_exited = 0;
                         break;



More information about the mpich2-commits mailing list