[mpich2-commits] r7873 - mpich2/trunk/src/pm/hydra/pm/pmiserv

balaji at mcs.anl.gov balaji at mcs.anl.gov
Mon Jan 31 23:53:14 CST 2011


Author: balaji
Date: 2011-01-31 23:53:14 -0600 (Mon, 31 Jan 2011)
New Revision: 7873

Modified:
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_utils.c
Log:
Fixes to the PMI_process_mapping key to allow for non-contiguous
layouts of nodes. Hydra still launches extra proxies in such cases,
but that part should be orthogonal to PMI.

Reviewed by buntinas.

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_utils.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_utils.c	2011-02-01 05:48:26 UTC (rev 7872)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmiserv_utils.c	2011-02-01 05:53:14 UTC (rev 7873)
@@ -103,10 +103,11 @@
 
 static HYD_status pmi_process_mapping(struct HYD_pg *pg, char **process_mapping_str)
 {
-    int i, node_id;
+    int i, is_equal;
     char *tmp[HYD_NUM_TMP_STRINGS];
-    struct HYD_proxy *proxy;
+    struct HYD_proxy *proxy, *tproxy;
     struct block {
+        int start_idx;
         int num_blocks;
         int block_size;
         struct block *next;
@@ -115,40 +116,102 @@
 
     HYDU_FUNC_ENTER();
 
+    /*
+     * Blocks are of the format: (start node ID, number of blocks,
+     * block size)
+     *
+     *   1. If two contiguous blocks have the same start node ID, and
+     *      the block size, we merge them.
+     *
+     *   2. If two contiguous blocks are contiguous in node ID values,
+     *      and have the same block size, we merge them.
+     */
     blocklist_head = NULL;
     for (proxy = pg->proxy_list; proxy; proxy = proxy->next) {
         if (blocklist_head == NULL) {
-            HYDU_MALLOC(blocklist_head, struct block *, sizeof(struct block), status);
-            blocklist_head->block_size = proxy->node.core_count;
-            blocklist_head->num_blocks = 1;
-            blocklist_head->next = NULL;
-            blocklist_tail = blocklist_head;
+            HYDU_MALLOC(block, struct block *, sizeof(struct block), status);
+            block->start_idx = proxy->proxy_id;
+            block->num_blocks = 1;
+            block->block_size = proxy->node.core_count;
+            block->next = NULL;
+
+            blocklist_tail = blocklist_head = block;
         }
-        else if (blocklist_tail->block_size == proxy->node.core_count) {
+        else if (blocklist_tail->start_idx == proxy->proxy_id &&
+                 blocklist_tail->block_size == proxy->node.core_count) {
             blocklist_tail->num_blocks++;
         }
         else {
-            HYDU_MALLOC(blocklist_tail->next, struct block *, sizeof(struct block), status);
-            blocklist_tail = blocklist_tail->next;
-            blocklist_tail->block_size = proxy->node.core_count;
-            blocklist_tail->num_blocks = 1;
-            blocklist_tail->next = NULL;
+            /* Check if this proxy hostname existed earlier */
+            for (tproxy = pg->proxy_list; tproxy; tproxy = tproxy->next) {
+                if (!strcmp(proxy->node.hostname, tproxy->node.hostname))
+                    break;
+            }
+
+            if (blocklist_tail->start_idx + blocklist_tail->num_blocks == tproxy->proxy_id &&
+                blocklist_tail->block_size == proxy->node.core_count) {
+                blocklist_tail->num_blocks++;
+            }
+            else {
+                HYDU_MALLOC(blocklist_tail->next, struct block *, sizeof(struct block),
+                            status);
+                blocklist_tail = blocklist_tail->next;
+                blocklist_tail->start_idx = tproxy ? tproxy->proxy_id : proxy->proxy_id;
+                blocklist_tail->num_blocks = 1;
+                blocklist_tail->block_size = proxy->node.core_count;
+                blocklist_tail->next = NULL;
+            }
         }
     }
 
+    /* See if there are any extra merging opportunities */
+
+    /* Case 1: If all the blocks are equivalent, just use one block */
+    is_equal = 1;
+    for (block = blocklist_head; block->next; block = block->next) {
+        if (block->start_idx != block->next->start_idx ||
+            block->block_size != block->next->block_size) {
+            is_equal = 0;
+            break;
+        }
+    }
+    if (is_equal) {
+        for (block = blocklist_head; block->next;) {
+            nblock = block->next;
+            block->next = nblock->next;
+            HYDU_FREE(nblock);
+        }
+        blocklist_tail = blocklist_head;
+    }
+
+    /* Case 2: If two contiguous blocks represent the same set of
+     * nodes, merge them */
+    for (block = blocklist_head; block->next;) {
+        blocklist_tail = block;
+        if (block->start_idx == block->next->start_idx &&
+            block->block_size == block->next->block_size) {
+            block->num_blocks += block->next->num_blocks;
+            nblock = block->next;
+            block->next = nblock->next;
+            HYDU_FREE(nblock);
+        }
+        else {
+            block = block->next;
+        }
+    }
+
+    /* Create the mapping out of the blocks */
     i = 0;
     tmp[i++] = HYDU_strdup("(");
     tmp[i++] = HYDU_strdup("vector,");
-    node_id = 0;
     for (block = blocklist_head; block; block = block->next) {
         tmp[i++] = HYDU_strdup("(");
-        tmp[i++] = HYDU_int_to_str(node_id);
+        tmp[i++] = HYDU_int_to_str(block->start_idx);
         tmp[i++] = HYDU_strdup(",");
         tmp[i++] = HYDU_int_to_str(block->num_blocks);
         tmp[i++] = HYDU_strdup(",");
         tmp[i++] = HYDU_int_to_str(block->block_size);
         tmp[i++] = HYDU_strdup(")");
-        node_id += (block->num_blocks * block->block_size);
         if (block->next)
             tmp[i++] = HYDU_strdup(",");
         HYDU_STRLIST_CONSOLIDATE(tmp, i, status);



More information about the mpich2-commits mailing list