[mpich2-commits] r7736 - mpich2/trunk/src/pm/hydra/pm/pmiserv

balaji at mcs.anl.gov balaji at mcs.anl.gov
Tue Jan 18 13:55:59 CST 2011


Author: balaji
Date: 2011-01-18 13:55:59 -0600 (Tue, 18 Jan 2011)
New Revision: 7736

Modified:
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_cb.c
Log:
Handle errors while sending data to dead processes more cleanly.

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_cb.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_cb.c	2011-01-18 19:55:58 UTC (rev 7735)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_cb.c	2011-01-18 19:55:59 UTC (rev 7736)
@@ -393,8 +393,14 @@
 
     status = HYDU_sock_write(hdr.pid, buf, hdr.buflen, &sent, &closed);
     HYDU_ERR_POP(status, "unable to forward PMI response to MPI process\n");
-    HYDU_ASSERT(!closed, status);
 
+    if (HYD_pmcd_pmip.user_global.auto_cleanup) {
+        HYDU_ASSERT(!closed, status);
+    }
+    else {
+        /* Ignore the error and drop the PMI response */
+    }
+
   fn_exit:
     if (pmi_cmd)
         HYDU_FREE(pmi_cmd);
@@ -886,6 +892,9 @@
         int count;
 
         if (hdr.buflen) {
+            if (HYD_pmcd_pmip.downstream.in == HYD_FD_CLOSED)
+                goto fn_exit;
+
             HYDU_MALLOC(buf, char *, hdr.buflen, status);
             HYDU_ERR_POP(status, "unable to allocate memory\n");
 
@@ -897,8 +906,15 @@
             status = HYDU_sock_write(HYD_pmcd_pmip.downstream.in, buf, hdr.buflen, &count,
                                      &closed);
             HYDU_ERR_POP(status, "unable to write to downstream stdin\n");
-            HYDU_ASSERT(!closed, status);
 
+            if (HYD_pmcd_pmip.user_global.auto_cleanup) {
+                HYDU_ASSERT(!closed, status);
+            }
+            else {
+                close(HYD_pmcd_pmip.downstream.in);
+                HYD_pmcd_pmip.downstream.in = HYD_FD_CLOSED;
+            }
+
             HYDU_FREE(buf);
         }
         else {



More information about the mpich2-commits mailing list