[MOAB-dev] r1623 - MOAB/trunk/parallel

tautges at mcs.anl.gov tautges at mcs.anl.gov
Wed Feb 27 17:27:32 CST 2008


Author: tautges
Date: 2008-02-27 17:27:32 -0600 (Wed, 27 Feb 2008)
New Revision: 1623

Modified:
   MOAB/trunk/parallel/MBParallelComm.cpp
   MOAB/trunk/parallel/MBParallelComm.hpp
   MOAB/trunk/parallel/ReadParallel.cpp
   MOAB/trunk/parallel/gs.c
   MOAB/trunk/parallel/gs.h
   MOAB/trunk/parallel/sort.h
   MOAB/trunk/parallel/transfer.c
   MOAB/trunk/parallel/tuple_list.c
   MOAB/trunk/parallel/tuple_list.h
Log:
Passing handles around during negotiation of shared vertices, so everyone knows the remote handles for shared vertices.  Works on smaller tests, but not yet on larger ones.


Modified: MOAB/trunk/parallel/MBParallelComm.cpp
===================================================================
--- MOAB/trunk/parallel/MBParallelComm.cpp	2008-02-27 22:36:43 UTC (rev 1622)
+++ MOAB/trunk/parallel/MBParallelComm.cpp	2008-02-27 23:27:32 UTC (rev 1623)
@@ -17,10 +17,14 @@
 
 #define MAX_SHARING_PROCS 10  
 
+#define MIN(a,b) (a < b ? a : b)
 const bool debug = false;
 
+#include <math.h>
+
 extern "C" 
 {
+#include "minmax.h"
 #include "gs.h"
 #include "tuple_list.h"
 }
@@ -33,6 +37,9 @@
 
 #define INITIAL_BUFF_SIZE 1024
 
+unsigned char MBParallelComm::PROC_SHARED = 0x1;
+unsigned char MBParallelComm::PROC_OWNER = 0x2;
+
 #define PACK_INT(buff, int_val) {int tmp_val = int_val; PACK_INTS(buff, &tmp_val, 1);}
 
 #define PACK_INTS(buff, int_val, num) {memcpy(buff, int_val, num*sizeof(int)); buff += num*sizeof(int);}
@@ -69,8 +76,15 @@
           dynamic_cast<MBCore*>(mbImpl)->get_error_handler()->set_last_error(a);\
           return result;}
 
+#define RRA(a) if (MB_SUCCESS != result) {\
+      std::string tmp_str; mbImpl->get_last_error(tmp_str);\
+      tmp_str.append("\n"); tmp_str.append(a);\
+      dynamic_cast<MBCore*>(mbImpl)->get_error_handler()->set_last_error(tmp_str.c_str()); \
+      return result;}
+
 MBParallelComm::MBParallelComm(MBInterface *impl, MPI_Comm comm) 
-    : mbImpl(impl), procConfig(comm)
+    : mbImpl(impl), procConfig(comm), sharedpTag(0), sharedpsTag(0),
+      sharedhTag(0), sharedhsTag(0), pstatusTag(0)
 {
   myBuffer.resize(INITIAL_BUFF_SIZE);
 
@@ -81,7 +95,8 @@
 MBParallelComm::MBParallelComm(MBInterface *impl,
                                std::vector<unsigned char> &tmp_buff, 
                                MPI_Comm comm) 
-    : mbImpl(impl), procConfig(comm)
+    : mbImpl(impl), procConfig(comm), sharedpTag(0), sharedpsTag(0),
+      sharedhTag(0), sharedhsTag(0), pstatusTag(0)
 {
   myBuffer.swap(tmp_buff);
 }
@@ -100,7 +115,7 @@
   for (int dim = 0; dim <= dimension; dim++) {
     if (dim == 0 || !largest_dim_only || dim == dimension) {
       result = mbImpl->get_entities_by_dimension(this_set, dim, entities[dim]); 
-      RR("Failed to get vertices in assign_global_ids.");
+      RRA("Failed to get vertices in assign_global_ids.");
     }
 
       // need to filter out non-locally-owned entities!!!
@@ -150,7 +165,7 @@
       num_elements[i++] = total_elems[dim]++;
     
     result = mbImpl->tag_set_data(gid_tag, entities[dim], &num_elements[0]); 
-    RR("Failed to set global id tag in assign_global_ids.");
+    RRA("Failed to set global id tag in assign_global_ids.");
   }
   
   return MB_SUCCESS;
@@ -184,7 +199,7 @@
     
     result = pack_buffer(entities, adjacencies, tags, true, 
                          whole_range, buff_size); 
-    RR("Failed to compute buffer size in communicate_entities.");
+    RRA("Failed to compute buffer size in communicate_entities.");
 
       // if the message is large, send a first message to tell how large
     if (INITIAL_BUFF_SIZE < buff_size) {
@@ -192,7 +207,7 @@
       MPI_Request send_req;
       int success = MPI_Isend(&tmp_buff_size, sizeof(int), MPI_UNSIGNED_CHAR, to_proc, 
                               0, procConfig.proc_comm(), &send_req);
-      if (!success) return MB_FAILURE;
+      if (success != MPI_SUCCESS) return MB_FAILURE;
     }
     
       // allocate space in the buffer
@@ -202,7 +217,7 @@
     int actual_buff_size;
     result = pack_buffer(entities, adjacencies, tags, false, 
                          whole_range, actual_buff_size); 
-    RR("Failed to pack buffer in communicate_entities.");
+    RRA("Failed to pack buffer in communicate_entities.");
     
       // send it
     MPI_Request send_req;
@@ -232,7 +247,7 @@
     
       // unpack the buffer
     result = unpack_buffer(entities); 
-    RR("Failed to unpack buffer in communicate_entities.");
+    RRA("Failed to unpack buffer in communicate_entities.");
   }
   
   return result;
@@ -265,7 +280,7 @@
 
   if ((int)procConfig.proc_rank() == from_proc) {
     result = pack_buffer( entities, adjacencies, tags, true, whole_range, buff_size ); 
-    RR("Failed to compute buffer size in broadcast_entities.");
+    RRA("Failed to compute buffer size in broadcast_entities.");
   }
 
   success = MPI_Bcast( &buff_size, 1, MPI_INT, from_proc, procConfig.proc_comm() );
@@ -281,7 +296,7 @@
     int actual_buffer_size;
     result = pack_buffer( entities, adjacencies, tags, false, 
                           whole_range, actual_buffer_size );
-    RR("Failed to pack buffer in broadcast_entities.");
+    RRA("Failed to pack buffer in broadcast_entities.");
   }
 
   success = MPI_Bcast( &myBuffer[0], buff_size, MPI_UNSIGNED_CHAR, from_proc, procConfig.proc_comm() );
@@ -290,7 +305,7 @@
   
   if ((int)procConfig.proc_rank() != from_proc) {
     result = unpack_buffer( entities );
-    RR("Failed to unpack buffer in broadcast_entities.");
+    RRA("Failed to unpack buffer in broadcast_entities.");
   }
 
   return MB_SUCCESS;
@@ -315,19 +330,19 @@
     // entities
   result = pack_entities(entities, rit, whole_range, buff_ptr, 
                          buff_size, just_count); 
-  RR("Packing entities failed.");
+  RRA("Packing entities failed.");
   
     // sets
   int tmp_size;
   result = pack_sets(entities, rit, whole_range, buff_ptr, tmp_size, just_count); 
-  RR("Packing sets failed.");
+  RRA("Packing sets failed.");
   buff_size += tmp_size;
   
     // adjacencies
   if (adjacencies) {
     result = pack_adjacencies(entities, rit, whole_range, buff_ptr, 
                               tmp_size, just_count);
-    RR("Packing adjs failed.");
+    RRA("Packing adjs failed.");
     buff_size += tmp_size;
   }
     
@@ -335,7 +350,7 @@
   if (tags) {
     result = pack_tags(entities, rit, whole_range, buff_ptr, 
                        tmp_size, just_count);
-    RR("Packing tags failed.");
+    RRA("Packing tags failed.");
     buff_size += tmp_size;
   }
 
@@ -348,11 +363,11 @@
   
   unsigned char *buff_ptr = &myBuffer[0];
   MBErrorCode result = unpack_entities(buff_ptr, entities);
-  RR("Unpacking entities failed.");
+  RRA("Unpacking entities failed.");
   result = unpack_sets(buff_ptr, entities);
-  RR("Unpacking sets failed.");
+  RRA("Unpacking sets failed.");
   result = unpack_tags(buff_ptr, entities);
-  RR("Unpacking tags failed.");
+  RRA("Unpacking tags failed.");
   
   return MB_SUCCESS;
 }
@@ -522,7 +537,7 @@
       //else {
         for (MBRange::const_iterator rit = allr_it->begin(); rit != allr_it->end(); rit++) {
           result = mbImpl->get_connectivity(*rit, connect, num_connect);
-          RR("Failed to get connectivity.");
+          RRA("Failed to get connectivity.");
           assert(num_connect == *nv_it);
           PACK_EH(buff_ptr, &connect[0], num_connect);
         }
@@ -580,7 +595,7 @@
         RR("Failed to allocate node arrays.");
 
         if (actual_start != (*pit).first)
-          return MB_FAILURE;
+          RR("Warning: actual vertex handle different from requested handle in unpack_entities.");
 
         entities.insert((*pit).first, (*pit).second);
         
@@ -616,6 +631,8 @@
                                        start_id, start_proc, actual_start,
                                        connect);
         RR("Failed to allocate element arrays.");
+        if (actual_start != (*pit).first) 
+          RR("Warning: actual entity handle different from requested handle in unpack_entities.");
 
           // copy connect arrays
         UNPACK_EH(buff_ptr, connect, (num_elems*verts_per_entity));
@@ -656,7 +673,7 @@
           // range-based set; count the subranges
         setRanges.push_back(MBRange());
         result = mbImpl->get_entities_by_handle(*start_rit, *setRanges.rbegin());
-        RR("Failed to get set entities.");
+        RRA("Failed to get set entities.");
         count += 2 * sizeof(MBEntityHandle) * num_subranges(*setRanges.rbegin()) + sizeof(int);
       }
       else if (options & MESHSET_ORDERED) {
@@ -1059,6 +1076,7 @@
   MBRange skin_ents[4];
   MBErrorCode result;
   std::vector<int> gid_data;
+  std::vector<MBEntityHandle> handle_vec;
 
   if (!proc_ents.empty()) {
       // find the skin entities
@@ -1074,7 +1092,7 @@
       skin_dim = upper_dim-1;
       result = skinner.find_skin(proc_ents, skin_ents[skin_dim],
                                  skin_ents[skin_dim], true);
-      RR("Failed to find skin.");
+      RRA("Failed to find skin.");
       if (debug) std::cerr << "Found skin, now resolving." << std::endl;
     }
     else {
@@ -1105,13 +1123,13 @@
     else if (MB_ALREADY_ALLOCATED != result) {
         // just created it, so we need global ids
       result = assign_global_ids(0, upper_dim);
-      RR("Failed assigning global ids.");
+      RRA("Failed assigning global ids.");
     }
 
       // store index in temp tag; reuse gid_data 
-    gid_data.resize(skin_ents[0].size());
+    gid_data.resize(2*skin_ents[0].size());
     int idx = 0;
-    for (MBRange::iterator rit = skin_ents[0].begin(); 
+    for (rit = skin_ents[0].begin(); 
          rit != skin_ents[0].end(); rit++) 
       gid_data[idx] = idx, idx++;
     MBTag idx_tag;
@@ -1124,10 +1142,16 @@
       // get gids for skin verts in a vector, to pass to gs
     result = mbImpl->tag_get_data(gid_tag, skin_ents[0], &gid_data[0]);
     RR("Couldn't get gid tag for skin vertices.");
+
+      // put handles in vector for passing to gs setup
+    std::copy(skin_ents[0].begin(), skin_ents[0].end(), 
+              std::back_inserter(handle_vec));
+
   }
   else {
       // need to have at least one position so we can get a ptr to it
     gid_data.resize(1);
+    handle_vec.resize(1);
   }
   
     // get a crystal router
@@ -1146,7 +1170,18 @@
   }
   
     // call gather-scatter to get shared ids & procs
-  gs_data *gsd = gs_data_setup(skin_ents[0].size(), (const ulong_*)&gid_data[0], 1, cd);
+  gs_data *gsd;
+  if (sizeof(int) != sizeof(long)) {
+    std::vector<long> lgid_data(gid_data.size());
+    std::copy(gid_data.begin(), gid_data.end(), std::back_inserter(lgid_data));
+    gsd = gs_data_setup(skin_ents[0].size(), &lgid_data[0], (ulong_*)&handle_vec[0], 2, 
+                        1, 1, cd);
+  }
+  else {
+    gsd = gs_data_setup(skin_ents[0].size(), (long*)&gid_data[0], 
+                        (ulong_*)&handle_vec[0], 2, 1, 1, cd);
+  }
+  
   if (NULL == gsd) {
     result = MB_FAILURE;
     RR("Couldn't create gs data.");
@@ -1156,63 +1191,181 @@
   if (proc_ents.empty()) return MB_SUCCESS;
   
     // get shared proc tags
-  int def_vals[2] = {-10*procConfig.proc_size(), -10*procConfig.proc_size()};
-  MBTag sharedp_tag, sharedps_tag;
-  result = mbImpl->tag_create(PARALLEL_SHARED_PROC_TAG_NAME, 2*sizeof(int), 
-                              MB_TAG_DENSE,
-                              MB_TYPE_INTEGER, sharedp_tag, &def_vals, true);
-  if (MB_SUCCESS != result && MB_ALREADY_ALLOCATED != result) {
-    RR("Couldn't create shared_proc tag.");
-  }
+  MBTag sharedp_tag, sharedps_tag, sharedh_tag, sharedhs_tag, pstatus_tag;
+  result = get_shared_proc_tags(sharedp_tag, sharedps_tag, 
+                                sharedh_tag, sharedhs_tag, pstatus_tag);
+  RRA(" ");
   
-  result = mbImpl->tag_create(PARALLEL_SHARED_PROCS_TAG_NAME, 
-                              MAX_SHARING_PROCS*sizeof(int), 
-                              MB_TAG_SPARSE,
-                              MB_TYPE_INTEGER, sharedps_tag, NULL, true);
-  if (MB_SUCCESS != result && MB_ALREADY_ALLOCATED != result) {
-    RR("Couldn't create shared_procs tag.");
-  }
-  
     // load shared vertices into a tuple, then sort by index
   tuple_list shared_verts;
-  tuple_list_init_max(&shared_verts, 0, 2, 0, 
-                      skin_ents[0].size()*MAX_SHARING_PROCS);
-  int i = 0;
-  unsigned int j = 0;
+  tuple_list_init_max(&shared_verts, 2, 0, 1, 0, 
+                      skin_ents[0].size()*(MAX_SHARING_PROCS+1));
+  unsigned int i = 0, j = 0;
   for (unsigned int p = 0; p < gsd->nlinfo->np; p++) 
-    for (unsigned int np = 0; np < gsd->nlinfo->nshared[p]; np++) 
-      shared_verts.vl[i++] = gsd->nlinfo->sh_ind[j++],
-        shared_verts.vl[i++] = gsd->nlinfo->target[p],
-        shared_verts.n++;
-  std::vector<int> sort_buffer(skin_ents[0].size()*MAX_SHARING_PROCS);
+    for (unsigned int np = 0; np < gsd->nlinfo->nshared[p]; np++) {
+      shared_verts.vi[i++] = gsd->nlinfo->sh_ind[j];
+      shared_verts.vi[i++] = gsd->nlinfo->target[p];
+      shared_verts.vul[j] = gsd->nlinfo->ulabels[j];
+      j++;
+      shared_verts.n++;
+    }
+  
+  int max_size = skin_ents[0].size()*(MAX_SHARING_PROCS+1);
+  std::vector<int> sort_buffer(max_size);
   tuple_list_sort(&shared_verts, 0,(buffer*)&sort_buffer[0]);
 
-    // set sharing procs tags on skin vertices
-  int maxp = -10*procConfig.proc_size();
-  int sharing_procs[MAX_SHARING_PROCS] = {maxp};
-  j = 0;
-  while (j < 2*shared_verts.n) {
-      // count & accumulate sharing procs
-    int nump = 0, this_idx = shared_verts.vl[j];
-    while (shared_verts.vl[j] == this_idx)
-      j++, sharing_procs[nump++] = shared_verts.vl[j++];
+    // set sharing procs and handles tags on skin vertices
+  int maxp = -1;
+  std::vector<int> sharing_procs(MAX_SHARING_PROCS);
+  std::fill(sharing_procs.begin(), sharing_procs.end(), maxp);
+  j = 0; i = 0;
 
-    sharing_procs[nump++] = procConfig.proc_rank();
-    MBEntityHandle this_ent = skin_ents[0][this_idx];
-    if (2 == nump)
-      result = mbImpl->tag_set_data(sharedp_tag, &this_ent, 1,
-                                    sharing_procs);
-    else
-      result = mbImpl->tag_set_data(sharedps_tag, &this_ent, 1,
-                                    sharing_procs);
-    RR("Failed setting shared_procs tag on skin vertices.");
+    // get vertices shared by 1 or n procs
+  std::map<std::vector<int>, MBRange> proc_nranges[MAX_SHARING_PROCS];
+  std::map<int, MBRange> proc_ranges;
+  result = tag_shared_verts(shared_verts, skin_ents,
+                            proc_ranges, proc_nranges);
+  RRA(" ");
+  
+    // get entities shared by 1 or n procs
+  result = tag_shared_ents(shared_dim, shared_verts, skin_ents,
+                           proc_ranges, proc_nranges);
+  RRA(" ");
+  
+    // create the sets for each interface
+  result = create_interface_sets(proc_ranges, proc_nranges);
+  RRA(" ");
+  
+  if (0 == shared_dim) return MB_SUCCESS;
 
-      // reset sharing proc(s) tags
-    std::fill(sharing_procs, sharing_procs+nump, maxp);
+/*  
+    // now send handles for shared non-vertex ents
+    // pack tuple_list;  tuple: dest proc, gid, handle
+  tuple_list_free(&shared_verts);
+  tuple_list_init_max(&shared_verts, 1, 2, 0, 
+                      skin_ents[0].size()*MAX_SHARING_PROCS);
+  MBRange::iterator rit;
+
+  for (dim = shared_dim; dim > 0; dim--) {
+    
+      // pack tuples for 1proc interface ents
+  result = mbImpl->tag_get_data(sharedp_tag, shared_verts_1proc,
+                                &sort_buffer[0]);
+  result = mbImpl->tag_get_data(gid_tag, shared_verts_1proc,
+                                &gid_data[0]);
+  for (rit = shared_verts_1proc.begin(), i = 0; rit != shared_verts_1proc.end();
+       rit++, i++) {
+      // skip if this vertex isn't owned by this proc
+    if (sort_buffer[2*i] != proc_rk) continue;
+    
+    shared_verts.vl[j++] = sort_buffer[2*i+1];
+    shared_verts.vl[j++] = gid_data[i];
+    shared_verts.vl[j++] = *rit;
+    shared_verts.n++;
   }
   
+    // pack tuples for nproc interface vertices
+  result = mbImpl->tag_get_data(sharedps_tag, shared_verts_nproc,
+                                &sort_buffer[0]);
+  result = mbImpl->tag_get_data(gid_tag, shared_verts_nproc,
+                                &gid_data[0]);
+  for (rit = shared_verts_nproc.begin(), i = 0; rit != shared_verts_nproc.end();
+       rit++, i++) {
+      // skip if this vertex isn't owned by this proc
+    if (sort_buffer[MAX_SHARING_PROCS*i] != proc_rk) continue;
+    unsigned int k = 1;
+    while (sort_buffer[MAX_SHARING_PROCS*i+k] != -1) {
+      shared_verts.vl[j++] = sort_buffer[MAX_SHARING_PROCS*i+k];
+      shared_verts.vl[j++] = gid_data[i];
+      shared_verts.vl[j++] = *rit;
+      shared_verts.n++;
+    }
+  }
+  
+    // exchange tuples
+  gs_transfer(1, &shared_verts, 0, cd);
+  
+    // process arriving tuples: match gid to local handle, 
+    // set pgid to remote handle
+    */
+
+    // done
+  return result;
+}
+
+MBErrorCode MBParallelComm::create_interface_sets(std::map<int, MBRange> &proc_ranges,
+                                                  std::map<std::vector<int>, MBRange> *proc_nranges) 
+{
+  MBTag sharedp_tag, sharedps_tag, sharedh_tag, sharedhs_tag, pstatus_tag;
+  MBErrorCode result = get_shared_proc_tags(sharedp_tag, sharedps_tag, 
+                                            sharedh_tag, sharedhs_tag,
+                                            pstatus_tag);
+  RRA(" ");
+
+  for (int j = 0; j < MAX_SHARING_PROCS; j++) {
+    for (std::map<std::vector<int>,MBRange>::iterator mit = proc_nranges[j].begin();
+       mit != proc_nranges[j].end(); mit++) {
+        // create the set
+      MBEntityHandle new_set;
+      result = mbImpl->create_meshset(MESHSET_SET, new_set); 
+      RR("Failed to create interface set.");
+        // add entities
+      result = mbImpl->add_entities(new_set, (*mit).second); 
+      RR("Failed to add entities to interface set.");
+        // tag it with the proc ranks
+      result = mbImpl->tag_set_data(sharedps_tag, &new_set, 1,
+                                    &((*mit).first)[0]); 
+      RR("Failed to tag interface set with procs.");
+
+        // get the owning proc, then set the pstatus tag
+      int min_proc = procConfig.proc_size();
+      for (int i = 0; i < j+1; i++) min_proc = MIN(min_proc, ((*mit).first)[i]);
+      unsigned char pstatus = PSTATUS_SHARED;
+      if (min_proc == (int) procConfig.proc_rank()) pstatus |= PSTATUS_NOT_OWNED;
+      result = mbImpl->tag_set_data(pstatus_tag, &new_set, 1, &pstatus); 
+      RR("Failed to tag interface set with pstatus.");
+    }
+  }
+  for (std::map<int,MBRange>::iterator mit = proc_ranges.begin();
+       mit != proc_ranges.end(); mit++) {
+      // create the set
+    MBEntityHandle new_set;
+    result = mbImpl->create_meshset(MESHSET_SET, new_set);
+    RR("Failed to create interface set.");
+      // add entities
+    result = mbImpl->add_entities(new_set, (*mit).second);
+    RR("Failed to add entities to interface set.");
+      // tag it with the proc ranks
+    int sharing_proc = (*mit).first;
+    result = mbImpl->tag_set_data(sharedp_tag, &new_set, 1,
+                                  &sharing_proc);
+    RR("Failed to tag interface set with procs.");
+
+      // get the owning proc, then set the pstatus tag
+    unsigned char pstatus = PSTATUS_SHARED;
+    if (sharing_proc > (int) procConfig.proc_rank()) pstatus |= PSTATUS_NOT_OWNED;
+    result = mbImpl->tag_set_data(pstatus_tag, &new_set, 1, &pstatus); 
+    RR("Failed to tag interface set with pstatus.");
+  }
+
+  return MB_SUCCESS;
+}
+
+MBErrorCode MBParallelComm::tag_shared_ents(int shared_dim,
+                                            tuple_list &shared_verts,
+                                            MBRange *skin_ents,
+                                            std::map<int, MBRange> &proc_ranges,
+                                            std::map<std::vector<int>, MBRange> *proc_nranges) 
+{
     // set sharing procs tags on other skin ents
+  MBTag sharedp_tag, sharedps_tag, sharedh_tag, sharedhs_tag, pstatus_tag;
+  MBErrorCode result = get_shared_proc_tags(sharedp_tag, sharedps_tag, 
+                                            sharedh_tag, sharedhs_tag, pstatus_tag);
+  RRA(" ");
   const MBEntityHandle *connect; int num_connect;
+  std::vector<int> sharing_procs(MAX_SHARING_PROCS);
+  std::fill(sharing_procs.begin(), sharing_procs.end(), -1);
+
   for (int d = shared_dim; d > 0; d--) {
     for (MBRange::iterator rit = skin_ents[d].begin();
          rit != skin_ents[d].end(); rit++) {
@@ -1222,15 +1375,15 @@
       MBRange sp_range, vp_range;
       for (int nc = 0; nc < num_connect; nc++) {
           // get sharing procs
-        result = mbImpl->tag_get_data(sharedp_tag, &(*rit), 1, sharing_procs);
+        result = mbImpl->tag_get_data(sharedp_tag, &(*rit), 1, &sharing_procs[0]);
         RR("Couldn't get sharedp_tag on skin vertices in entity.");
-        if (sharing_procs[0] == maxp) {
-          result = mbImpl->tag_get_data(sharedps_tag, &(*rit), 1, sharing_procs);
+        if (sharing_procs[0] == -1) {
+          result = mbImpl->tag_get_data(sharedps_tag, &(*rit), 1, &sharing_procs[0]);
           RR("Couldn't get sharedps_tag on skin vertices in entity.");
         }
           // build range of sharing procs for this vertex
         unsigned int p = 0; vp_range.clear();
-        while (sharing_procs[p] != maxp && p < MAX_SHARING_PROCS)
+        while (sharing_procs[p] != -1 && p < MAX_SHARING_PROCS)
           vp_range.insert(sharing_procs[p]), p++;
         assert(p < MAX_SHARING_PROCS);
           // intersect with range for this skin ent
@@ -1242,27 +1395,153 @@
       assert(!sp_range.empty());
       MBRange::iterator rit2;
         // set tag for this ent
-      for (j = 0, rit2 = sp_range.begin(); 
-           rit2 != sp_range.end(); rit2++, j++)
-        sharing_procs[j] = *rit;
-      if (2 >= j)
+      int j = 0;
+      for (rit2 = sp_range.begin(); rit2 != sp_range.end(); rit2++)
+        if (*rit2 != procConfig.proc_rank()) {
+          sharing_procs[j++] = *rit2;
+        }
+          
+
+      if (2 > j) {
         result = mbImpl->tag_set_data(sharedp_tag, &(*rit), 1,
-                                      sharing_procs);
-      else
+                                      &sharing_procs[0]);
+        proc_ranges[sharing_procs[0]].insert(*rit);
+      }
+      else {
         result = mbImpl->tag_set_data(sharedps_tag, &(*rit), 1,
-                                      sharing_procs);
-
+                                      &sharing_procs[0]);
+        proc_nranges[j-1][sharing_procs].insert(*rit);
+      }
+      
       RR("Failed to set sharedp(s)_tag on non-vertex skin entity.");
       
         // reset sharing proc(s) tags
-      std::fill(sharing_procs, sharing_procs+j, maxp);
+      std::fill(sharing_procs.begin(), sharing_procs.end(), -1);
     }
   }
 
-    // done
-  return result;
+  return MB_SUCCESS;
 }
 
+MBErrorCode MBParallelComm::tag_shared_verts(tuple_list &shared_verts,
+                                             MBRange *skin_ents,
+                                             std::map<int, MBRange> &proc_ranges,
+                                             std::map<std::vector<int>, MBRange> *proc_nranges) 
+{
+  MBTag sharedp_tag, sharedps_tag, sharedh_tag, sharedhs_tag, pstatus_tag;
+  MBErrorCode result = get_shared_proc_tags(sharedp_tag, sharedps_tag, 
+                                            sharedh_tag, sharedhs_tag, pstatus_tag);
+  RRA(" ");
+  
+  unsigned int j = 0, i = 0;
+  std::vector<int> sharing_procs(MAX_SHARING_PROCS);
+  MBEntityHandle sharing_handles[MAX_SHARING_PROCS];
+  int maxp = -1;
+  std::fill(sharing_procs.begin(), sharing_procs.end(), maxp);
+  std::fill(sharing_handles, sharing_handles+MAX_SHARING_PROCS, 0);
+  
+  while (j < 2*shared_verts.n) {
+      // count & accumulate sharing procs
+    unsigned int nump = 0;
+    int this_idx = shared_verts.vi[j];
+    MBEntityHandle this_ent = skin_ents[0][this_idx];
+    while (shared_verts.vi[j] == this_idx) {
+      j++;
+      sharing_procs[nump] = shared_verts.vi[j++];
+      sharing_handles[nump++] = shared_verts.vul[i++];
+    }
+
+    if (1 == nump) {
+      proc_ranges[sharing_procs[0]].insert(this_ent);
+      result = mbImpl->tag_set_data(sharedp_tag, &this_ent, 1,
+                                    &sharing_procs[0]);
+      result = mbImpl->tag_set_data(sharedh_tag, &this_ent, 1,
+                                    sharing_handles);
+    }
+    else {
+      proc_nranges[nump-1][sharing_procs].insert(this_ent);
+      result = mbImpl->tag_set_data(sharedps_tag, &this_ent, 1,
+                                    &sharing_procs[0]);
+      result = mbImpl->tag_set_data(sharedhs_tag, &this_ent, 1,
+                                    sharing_handles);
+    }
+    RR("Failed setting shared_procs tag on skin vertices.");
+
+      // reset sharing proc(s) tags
+    std::fill(sharing_procs.begin(), sharing_procs.end(), maxp);
+    std::fill(sharing_handles, sharing_handles+nump, 0);
+  }
+
+  return MB_SUCCESS;
+}
+  
+MBErrorCode MBParallelComm::get_shared_proc_tags(MBTag &sharedp_tag,
+                                                 MBTag &sharedps_tag,
+                                                 MBTag &sharedh_tag,
+                                                 MBTag &sharedhs_tag,
+                                                 MBTag &pstatus_tag) 
+{
+  int def_val = -1;
+  MBErrorCode result;
+  
+  if (!sharedpTag) {
+    result = mbImpl->tag_create(PARALLEL_SHARED_PROC_TAG_NAME, sizeof(int), 
+                                MB_TAG_DENSE,
+                                MB_TYPE_INTEGER, sharedpTag, &def_val, true);
+    if (MB_SUCCESS != result && MB_ALREADY_ALLOCATED != result) {
+      RR("Couldn't create shared_proc tag.");
+    }
+  }
+  sharedp_tag = sharedpTag;
+  
+  if (!sharedpsTag) {
+    result = mbImpl->tag_create(PARALLEL_SHARED_PROCS_TAG_NAME, 
+                                MAX_SHARING_PROCS*sizeof(int), 
+                                MB_TAG_SPARSE,
+                                MB_TYPE_INTEGER, sharedpsTag, NULL, true);
+    if (MB_SUCCESS != result && MB_ALREADY_ALLOCATED != result) {
+      RR("Couldn't create shared_procs tag.");
+    }
+  }
+  sharedps_tag = sharedpsTag;
+  
+  def_val = 0;
+  if (!sharedhTag) {
+    result = mbImpl->tag_create(PARALLEL_SHARED_HANDLE_TAG_NAME, 
+                                sizeof(MBEntityHandle), 
+                                MB_TAG_DENSE,
+                                MB_TYPE_INTEGER, sharedhTag, &def_val, true);
+    if (MB_SUCCESS != result && MB_ALREADY_ALLOCATED != result) {
+      RR("Couldn't create shared_handle tag.");
+    }
+  }
+  sharedh_tag = sharedhTag;
+  
+  if (!sharedhsTag) {
+    result = mbImpl->tag_create(PARALLEL_SHARED_HANDLES_TAG_NAME, 
+                                MAX_SHARING_PROCS*sizeof(MBEntityHandle), 
+                                MB_TAG_SPARSE,
+                                MB_TYPE_INTEGER, sharedhsTag, NULL, true);
+    if (MB_SUCCESS != result && MB_ALREADY_ALLOCATED != result) {
+      RR("Couldn't create shared_handles tag.");
+    }
+  }
+  sharedhs_tag = sharedhsTag;
+  
+  if (!pstatusTag) {
+    result = mbImpl->tag_create(PARALLEL_STATUS_TAG_NAME, 
+                                1,
+                                MB_TAG_SPARSE,
+                                MB_TYPE_OPAQUE, pstatusTag, NULL, true);
+    if (MB_SUCCESS != result && MB_ALREADY_ALLOCATED != result) {
+      RR("Couldn't create shared_handles tag.");
+    }
+  }
+  pstatus_tag = pstatusTag;
+  
+  return MB_SUCCESS;
+}
+
 MBErrorCode MBParallelComm::get_shared_entities(int dim,
                                                 MBRange &shared_ents) 
 {
@@ -1335,12 +1614,98 @@
       // just created it, so we need global ids
     result = assign_global_ids(this_set, dimension, start_id, largest_dim_only,
                                parallel);
-    RR("Failed assigning global ids.");
+    RRA("Failed assigning global ids.");
   }
 
   return MB_SUCCESS;
 }
+/*
+MBErrorCode MBParallelComm::exchange_ghost_cells(int to_dim, int bridge_dim) 
+{
+  MBTag sharedp_tag, sharedps_tag, sharedh_tag, sharedhs_tag, pstatus_tag;
+  MBErrorCode result = get_shared_proc_tags(sharedp_tag, sharedps_tag, 
+                       sharedh_tag, sharedhs_tag, pstatus_tag);
+  RR(" ");
 
+  MBRange proc_sets, procs_sets;
+  result = get_iface_sets(proc_sets, procs_sets);
+  RR("Failed to get interface sets.");
+  std::vector<int> other_procs(iface_sets.size()), other_procs2(MAX_SHARING_PROCS);
+  result = mbImpl->tag_get_data(sharedp_tag, iface_sets, &other_procs[0]);
+  RR("Failed to get shared_proc tag for interface sets.");
+  
+    // map stores proc communicated with and ghost entities/new vertices on that proc
+  struct 
+  {
+    MBRange ghost_ents;
+    MBRange newverts;
+    MPI_Request mpireq;
+  } proc_struct;
+  
+  std::map<int,proc_struct> pstructs;
+
+    // get vertices shared with each proc
+  MBRange::iterator ifsit, rit;
+  std::vector<int>::iterator vit, vit2;
+  MeshTopoUtils mtu(mbImpl);
+  std::vector<MBEntityHandle> tmp_handles;
+  MBRange iface_ents;
+  for (ifsit = proc_sets.begin(), vit = other_procs.begin(); 
+       ifsit != proc_sets.end(); ifsit++, vit++) {
+    iface_ents.clear();
+    result = mbImpl->get_entities_by_type(*ifsit, MBVERTEX, iface_ents);
+    RR("Failed to get vertices in iface set.");
+    pstructs[*vit].newverts.merge(iface_ents);
+  }
+  for (ifsit = procs_sets.begin(); ifsit != procs_sets.end(); ifsit++) {
+    iface_ents.clear();
+    result = mbImpl->get_entities_by_type(*ifsit, MBVERTEX, iface_ents);
+    RR("Failed to get vertices in iface set.");
+    result = mbImpl->tag_get_data(sharedps_tag, &(*ifset), 1, &other_procs2[0]);
+    RR("Failed to get sharedps_tag for iface set.");
+    int i = 0;
+    while (i < MAX_SHARING_PROCS && other_procs2[i] >= 0)
+      pstructs[other_procs2[i]].newverts.merge(iface_ents);
+  }
+  
+      // get 1st order adjs of to_dim; only work for single-proc interfaces for now
+  for (ifsit = proc_sets.begin(), vit = other_procs.begin(); 
+       ifsit != proc_sets.end(); ifsit++, vit++) {
+      // get the entities on the interface
+    iface_ents.clear();
+    result = mbImpl->get_entities_by_handle(*ifsit, iface_ents);
+    RR("Failed to get entities in iface set.");
+      // get 1st order adjs of to_dim
+    MBRange &ghostents = &pstructs[*vit].ghostents;
+    MBRange &newverts = &pstructs[*vit].newverts;
+    result = mbImpl->get_adjacencies(iface_ents, to_dim, true, ghostents);
+    RR("Failed to get 1st order adjs from interface.");
+
+    if (-1 != bridge_dim) {
+        // get bridge adjs; reuse iface_ents
+      iface_ents.clear();
+      result = mtu.get_bridge_adjacencies(ghostents, bridge_dim, to_dim, iface_ents);
+      RR("Failed to get bridge adjs from interface.");
+      ghostents.merge(iface_ents);
+    }
+    
+      // get vertices, and filter out ones shared already; reuse iface_ents
+    iface_ents.clear();
+    result = mbImpl->get_adjacencies(ghostents, 0, false, iface_ents, MBInterface::UNION);
+    RR("Failed to get vertex adjs to ghost entities.");
+    newverts = iface_ents.subtract(newverts);
+
+      // Isend # new vertices
+    int num_verts = newverts.size();
+    int success = MPI_Isend(&num_verts, 1, MPI_INTEGER, *vit,
+                            0, procConfig.proc_comm(), &(pstructs[*vit].mpireq));
+    if (!success) return MB_FAILURE;
+  }
+  
+*/    
+      
+    
+
 #ifdef TEST_PARALLELCOMM
 
 #include <iostream>

Modified: MOAB/trunk/parallel/MBParallelComm.hpp
===================================================================
--- MOAB/trunk/parallel/MBParallelComm.hpp	2008-02-27 22:36:43 UTC (rev 1622)
+++ MOAB/trunk/parallel/MBParallelComm.hpp	2008-02-27 23:27:32 UTC (rev 1623)
@@ -28,6 +28,14 @@
 #include "MBForward.hpp"
 #include "MBRange.hpp"
 #include "MBProcConfig.hpp"
+#include <map>
+#include "math.h"
+extern "C" 
+{
+#include "minmax.h"
+#include "sort.h"
+#include "tuple_list.h"
+}
 
 class TagServer;
 class SequenceManager;
@@ -45,6 +53,8 @@
                  std::vector<unsigned char> &tmp_buff,
                  MPI_Comm comm = MPI_COMM_WORLD);
 
+  static unsigned char PROC_SHARED, PROC_OWNER;
+  
     //! assign a global id space, for largest-dimension or all entities (and
     //! in either case for vertices too)
   MBErrorCode assign_global_ids(MBEntityHandle this_set,
@@ -136,6 +146,12 @@
     //! Get proc config for this communication object
   const MBProcConfig &proc_config() const {return procConfig;}
   
+    //! return the tags used to indicate shared procs and handles
+  MBErrorCode get_shared_proc_tags(MBTag &sharedp_tag,
+                                   MBTag &sharedps_tag,
+                                   MBTag &sharedh_tag,
+                                   MBTag &sharedhs_tag,
+                                   MBTag &pstatus_tag);
       
 private:
 
@@ -181,7 +197,20 @@
   MBErrorCode unpack_tags(unsigned char *&buff_ptr,
                           MBRange &entities);
   
+  MBErrorCode tag_shared_verts(tuple_list &shared_verts,
+                               MBRange *skin_ents,
+                               std::map<int, MBRange> &proc_ranges,
+                               std::map<std::vector<int>, MBRange> *proc_nranges);
+  
+  MBErrorCode tag_shared_ents(int shared_dim,
+                              tuple_list &shared_verts,
+                              MBRange *skin_ents,
+                              std::map<int, MBRange> &proc_ranges,
+                              std::map<std::vector<int>, MBRange> *proc_nranges);
 
+  MBErrorCode create_interface_sets(std::map<int, MBRange> &proc_ranges,
+                                    std::map<std::vector<int>, MBRange> *proc_nranges);
+  
     //! MB interface associated with this writer
   MBInterface *mbImpl;
 
@@ -226,6 +255,19 @@
   
     //! numbers of parents/children for transferred sets
   std::vector<int> setPcs;
+
+    //! tags used to save sharing procs and handles
+  MBTag sharedpTag, sharedpsTag, sharedhTag, sharedhsTag, pstatusTag;
+
+    //! interface sets, one set per unique combination of procs
+  MBRange ifaceSets;
+  
+    //! ghost sets (sets of ghost entities), one set per unique combination of procs
+  MBRange ghostSets;
+  
+    //! ghosted sets (sets of ghosted entities), one set per unique combination of procs
+  MBRange ghostedSets;
+  
 };
 
 #endif

Modified: MOAB/trunk/parallel/ReadParallel.cpp
===================================================================
--- MOAB/trunk/parallel/ReadParallel.cpp	2008-02-27 22:36:43 UTC (rev 1622)
+++ MOAB/trunk/parallel/ReadParallel.cpp	2008-02-27 23:27:32 UTC (rev 1623)
@@ -39,10 +39,10 @@
     // Get parallel settings
   int parallel_mode;
   const char* parallel_opts[] = { "NONE", "BCAST", "BCAST_DELETE", 
-                                  "READ_DELETE", "SCATTER", 
+                                  "READ_DELETE", "READ_PARALLEL", 
                                   "FORMAT", 0 };
   enum ParallelOpts {POPT_NONE=0, POPT_BCAST, POPT_BCAST_DELETE, 
-                     POPT_READ_DELETE, POPT_SCATTER,
+                     POPT_READ_DELETE, POPT_READ_PARALLEL,
                      POPT_FORMAT, POPT_LAST};
       
   MBErrorCode result = opts.match_option( "PARALLEL", parallel_opts, 
@@ -121,8 +121,8 @@
     case POPT_FORMAT:
       merror->set_last_error( "Access to format-specific parallel read not implemented.\n");
       return MB_NOT_IMPLEMENTED;
-    case POPT_SCATTER:
-      merror->set_last_error( "Partitioning for PARALLEL=SCATTER not supported yet.\n");
+    case POPT_READ_PARALLEL:
+      merror->set_last_error( "Partitioning for PARALLEL=READ_PARALLEL not supported yet.\n");
       return MB_NOT_IMPLEMENTED;
     default:
       return MB_FAILURE;
@@ -368,7 +368,7 @@
       // cut them in half if we're on one proc
     if (proc_sz == 1 && num_partsets == num_sets) num_sets /= 2;
     
-    for (int i = 0; i < num_sets; i++) 
+    for (unsigned int i = 0; i < num_sets; i++) 
       tmp_sets.insert(partition_sets[i*proc_sz + proc_rk]);
 
     partition_sets.swap(tmp_sets);

Modified: MOAB/trunk/parallel/gs.c
===================================================================
--- MOAB/trunk/parallel/gs.c	2008-02-27 22:36:43 UTC (rev 1622)
+++ MOAB/trunk/parallel/gs.c	2008-02-27 23:27:32 UTC (rev 1623)
@@ -77,6 +77,7 @@
 #include "errmem.h"     
 #include "types.h"
 #include "minmax.h"
+#include "sort.h"
 #include "tuple_list.h"
 #ifdef USE_MPI
 #  include "crystal.h"  
@@ -87,6 +88,8 @@
   uint *target;      /* int target[np]: array of processor ids to comm w/ */
   uint *nshared;     /* nshared[i] = number of points shared w/ target[i] */
   uint *sh_ind;      /* list of shared point indices                      */
+  slong *slabels;    /* list of signed long labels (not including gid)    */
+  ulong *ulabels;    /* list of unsigned long labels                      */
   MPI_Request *reqs; /* pre-allocated for MPI calls                       */
   real *buf;         /* pre-allocated buffer to receive data              */
   uint maxv;         /* maximum vector size                               */
@@ -186,13 +189,18 @@
 
 #ifdef USE_MPI
 
-static nonlocal_info *nlinfo_alloc(uint np, uint count, uint maxv)
+static nonlocal_info *nlinfo_alloc(uint np, uint count, uint nlabels,
+                                   uint nulabels, uint maxv)
 {
   nonlocal_info *info = tmalloc(nonlocal_info,1);
   info->np = np;
   info->target = tmalloc(uint,2*np+count);
   info->nshared = info->target + np;
   info->sh_ind = info->nshared + np;
+  if (1 < nlabels)
+    info->slabels = tmalloc(slong, (nlabels-1)*count);
+  else info->slabels = NULL;
+  info->ulabels = tmalloc(ulong, nulabels*count);
   info->reqs = tmalloc(MPI_Request,2*np);
   info->buf = tmalloc(real,2*count*maxv);
   info->maxv = maxv;
@@ -204,6 +212,9 @@
   free(info->buf);
   free(info->reqs);
   free(info->target);
+  if (info->slabels)
+    free(info->slabels);
+  free(info->ulabels);
   free(info);
 }
 
@@ -390,17 +401,14 @@
    Setup
   --------------------------------------------------------------------------*/
 
-gs_data *gs_data_setup(uint n, const ulong *label,
-                       uint maxv, crystal_data *crystal)
+gs_data *gs_data_setup(uint n, const long *label, const ulong *ulabel,
+                       uint maxv, const unsigned int nlabels, const unsigned int nulabels,
+                       crystal_data *crystal)
 {
   gs_data *data=tmalloc(gs_data,1);
   tuple_list nonzero, primary;
-  const int nz_index=0, nz_size=1, nz_label=0;
-  const int pr_nzindex=0, pr_index=1, pr_count=2, pr_size=3, pr_label=0;
 #ifdef USE_MPI
   tuple_list shared;
-  const int pr_proc=0;
-  const int sh_dproc=0, sh_proc2=1, sh_index=2, sh_size=3, sh_label=0;
 #else
   buffer buf;
 #endif
@@ -411,69 +419,79 @@
 #endif
 
   /* construct list of nonzeros: (index ^, label) */
-  tuple_list_init_max(&nonzero,nz_size,1,0,n);
+  tuple_list_init_max(&nonzero,1,nlabels,nulabels,0,n);
   {
-    uint i; sint *nzi = nonzero.vi; slong *nzl = nonzero.vl;
+    uint i; sint *nzi = nonzero.vi; slong *nzl = nonzero.vl; ulong *nzul = nonzero.vul;
     for(i=0;i<n;++i)
-      if(label[i]!=0) 
-        nzi[nz_index]=i,
-        nzl[nz_label]=label[i],
-        nzi+=nz_size, ++nzl, nonzero.n++;
+      if(label[i]!=0) {
+        nzi[0]=i;
+        unsigned int j;
+        for (j = 0; j < nlabels; j++)
+          nzl[i*nlabels+j]=label[nlabels*i+j];
+        for (j = 0; j < nulabels; j++)
+          nzul[j]=label[nulabels*i+j];
+        nzi++, nzl+= nlabels, nzul+=nulabels, nonzero.n++;
+      }
   }
 
   /* sort nonzeros by label: (index ^2, label ^1) */
 #ifndef USE_MPI
-  tuple_list_sort(&nonzero,nz_size+nz_label,&buf);
+  tuple_list_sort(&nonzero,1,&buf);
 #else
-  tuple_list_sort(&nonzero,nz_size+nz_label,&crystal->all->buf);
+  tuple_list_sort(&nonzero,1,&crystal->all->buf);
 #endif
 
   /* build list of unique labels w/ lowest associated index:
      (index in nonzero ^, primary (lowest) index in label, count, label) */
-  tuple_list_init_max(&primary,pr_size,1,0,nonzero.n);
+  tuple_list_init_max(&primary,3,nlabels,nulabels,0,nonzero.n);
   {
     uint i;
     sint  *nzi=nonzero.vi, *pi=primary.vi;
     slong *nzl=nonzero.vl, *pl=primary.vl;
+    slong *nzul=nonzero.vul, *pul=primary.vul;
     sint last=-1;
-    for(i=0;i<nonzero.n;++i,nzi+=nz_size,++nzl) {
-      if(nzl[nz_label]==last) {
-        ++pi[-pr_size+pr_count];
+    for(i=0;i<nonzero.n;++i,nzi+=1,nzl+=nlabels,nzul+=nulabels) {
+      if(nzl[0]==last) {
+        ++pi[-1];
         continue;
       }
-      last=nzl[nz_label];
-      pi[pr_nzindex]=i;
-      pi[pr_index]=nzi[nz_index];
-      pl[pr_label]=nzl[nz_label];
-      pi[pr_count]=1;
-      pi+=pr_size, ++pl; primary.n++;
+      last=nzl[0];
+      pi[0]=i;
+      pi[1]=nzi[0];
+      unsigned int j;
+      for (j = 0; j < nlabels; j++)
+        pl[j]=nzl[j];
+      for (j = 0; j < nulabels; j++)
+        pul[j]=nzul[j];
+      pi[2]=1;
+      pi+=3, pl+=nlabels; pul+=nulabels; primary.n++;
     }
   }
 
   /* calculate size of local condense map */
   {
     uint i, count=1; sint *pi=primary.vi;
-    for(i=primary.n;i;--i,pi+=pr_size)
-      if(pi[pr_count]>1) count+=pi[pr_count]+1;
+    for(i=primary.n;i;--i,pi+=3)
+      if(pi[2]>1) count+=pi[2]+1;
     data->local_cm = tmalloc(sint,count);
   }
 
   /* sort unique labels by primary index:
      (nonzero index ^2, primary index ^1, count, label ^2) */
 #ifndef USE_MPI
-  tuple_list_sort(&primary,pr_index,&buf);
+  tuple_list_sort(&primary,1,&buf);
   buffer_free(&buf);
 #else
-  tuple_list_sort(&primary,pr_index,&crystal->all->buf);
+  tuple_list_sort(&primary,1,&crystal->all->buf);
 #endif
   
   /* construct local condense map */
   {
     uint i, n; sint *pi=primary.vi;
     sint *cm = data->local_cm;
-    for(i=primary.n;i;--i,pi+=pr_size) if((n=pi[pr_count])>1) {
-      uint j; sint *nzi=nonzero.vi+nz_size*pi[pr_nzindex];
-      for(j=n;j;--j,nzi+=nz_size) *cm++ = nzi[nz_index];
+    for(i=primary.n;i;--i,pi+=3) if((n=pi[2])>1) {
+      uint j; sint *nzi=nonzero.vi+1*pi[0];
+      for(j=n;j;--j,nzi+=1) *cm++ = nzi[0];
       *cm++ = -1;
     }
     *cm++ = -1;
@@ -486,64 +504,85 @@
   /* assign work proc by label modulo np */
   {
     uint i; sint *pi=primary.vi; slong *pl=primary.vl;
-    for(i=primary.n;i;--i,pi+=pr_size,++pl)
-      pi[pr_proc]=pl[pr_label]%crystal->num;
+    for(i=primary.n;i;--i,pi+=3,pl+=nlabels)
+      pi[0]=pl[0]%crystal->num;
   }
-  gs_transfer(1,&primary,pr_proc,crystal); /* transfer to work procs */
+  gs_transfer(1,&primary,0,crystal); /* transfer to work procs */
   /* primary: (source proc, index on src, useless, label) */
   /* sort by label */
-  tuple_list_sort(&primary,pr_size+pr_label,&crystal->all->buf);
+  tuple_list_sort(&primary,3,&crystal->all->buf);
   /* add sentinel to primary list */
   if(primary.n==primary.max) tuple_list_grow(&primary);
   primary.vl[primary.n] = -1;
   /* construct shared list: (proc1, proc2, index1, label) */
-  tuple_list_init_max(&shared,sh_size,1,0,primary.n);
+  tuple_list_init_max(&shared,3,nlabels,nulabels,0,primary.n);
   {
     sint *pi1=primary.vi, *si=shared.vi;
     slong lbl, *pl1=primary.vl, *sl=shared.vl;
-    for(;(lbl=pl1[pr_label])!=-1;pi1+=pr_size,++pl1) {
-      sint *pi2=pi1+pr_size; slong *pl2=pl1+1;
-      for(;pl2[pr_label]==lbl;pi2+=pr_size,++pl2) {
+    ulong *pul1=primary.vul, *sul=shared.vul;
+    for(;(lbl=pl1[0])!=-1;pi1+=3,pl1+=nlabels,pul1+=nulabels) {
+      sint *pi2=pi1+3; slong *pl2=pl1+nlabels; ulong *pul2=pul1+nulabels;
+      for(;pl2[0]==lbl;pi2+=3,pl2+=nlabels,pul2+=nulabels) {
         if(shared.n+2>shared.max)
           tuple_list_grow(&shared),
-          si=shared.vi+shared.n*sh_size, sl=shared.vl+shared.n;
-        si[sh_dproc] = pi1[pr_proc];
-        si[sh_proc2] = pi2[pr_proc];
-        si[sh_index] = pi1[pr_index];
-        sl[sh_label] = lbl;
-        si+=sh_size, ++sl, shared.n++;
-        si[sh_dproc] = pi2[pr_proc];
-        si[sh_proc2] = pi1[pr_proc];
-        si[sh_index] = pi2[pr_index];
-        sl[sh_label] = lbl;
-        si+=sh_size, ++sl, shared.n++;
+          si=shared.vi+shared.n*3, sl=shared.vl+shared.n*nlabels, 
+              sul=shared.vul+shared.n*nulabels;
+        si[0] = pi1[0];
+        si[1] = pi2[0];
+        si[2] = pi1[1];
+        unsigned int j;
+        for (j = 0; j < nlabels; j++)
+          sl[j] = pl2[j];
+        for (j = 0; j < nulabels; j++)
+          sul[j] = pul2[j];
+        si+=3, sl+=nlabels, sul+=nulabels, shared.n++;
+        si[0] = pi2[0];
+        si[1] = pi1[0];
+        si[2] = pi2[1];
+        for (j = 0; j < nlabels; j++)
+          sl[j] = pl2[j];
+        for (j = 0; j < nulabels; j++)
+          sul[j] = pul2[j];
+        si+=3, sul+=nulabels, shared.n++;
       }
     }
   }
   tuple_list_free(&primary);
-  gs_transfer(1,&shared,sh_dproc,crystal); /* transfer to dest procs */
+  gs_transfer(1,&shared,0,crystal); /* transfer to dest procs */
   /* shared list: (useless, proc2, index, label) */
   /* sort by label */
-  tuple_list_sort(&shared,sh_size+sh_label,&crystal->all->buf);
+  tuple_list_sort(&shared,3,&crystal->all->buf);
   /* sort by partner proc */
-  tuple_list_sort(&shared,sh_proc2,&crystal->all->buf);
+  tuple_list_sort(&shared,1,&crystal->all->buf);
   /* count partner procs */
   {
     uint i, count=0; sint proc=-1,*si=shared.vi;
-    for(i=shared.n;i;--i,si+=sh_size)
-      if(si[sh_proc2]!=proc) ++count, proc=si[sh_proc2];
-    data->nlinfo = nlinfo_alloc(count,shared.n,maxv);
+    for(i=shared.n;i;--i,si+=3)
+      if(si[1]!=proc) ++count, proc=si[1];
+    data->nlinfo = nlinfo_alloc(count,shared.n,
+                                nlabels, nulabels, maxv);
   }
   /* construct non-local info */
   {
     uint i; sint proc=-1,*si=shared.vi;
+    slong *sl = shared.vl;
+    ulong *ul = shared.vul;
     uint *target  = data->nlinfo->target;
     uint *nshared = data->nlinfo->nshared;
     uint *sh_ind  = data->nlinfo->sh_ind;
-    for(i=shared.n;i;--i,si+=sh_size) {
-      if(si[sh_proc2]!=proc)
-        proc=si[sh_proc2], *target++ = proc, *nshared++ = 0;
-      ++nshared[-1], *sh_ind++=si[sh_index];
+    ulong *slabels = data->nlinfo->slabels;
+    ulong *ulabels = data->nlinfo->ulabels;
+    uint j;
+    for(i=shared.n;i;--i,si+=3) {
+      if(si[1]!=proc)
+        proc=si[1], *target++ = proc, *nshared++ = 0;
+      ++nshared[-1], *sh_ind++=si[2];
+        // don't store 1st slabel
+      sl++;
+      for (j = 0; j < nlabels-1; j++)
+        *slabels++ = *sl;
+      for (j = 0; j < nulabels; j++)
+        *ulabels++ = *ul;
     }
   }
   tuple_list_free(&shared);

Modified: MOAB/trunk/parallel/gs.h
===================================================================
--- MOAB/trunk/parallel/gs.h	2008-02-27 22:36:43 UTC (rev 1622)
+++ MOAB/trunk/parallel/gs.h	2008-02-27 23:27:32 UTC (rev 1623)
@@ -18,6 +18,8 @@
   uint *target;      /* int target[np]: array of processor ids to comm w/ */
   uint *nshared;     /* nshared[i] = number of points shared w/ target[i] */
   uint *sh_ind;      /* list of shared point indices                      */
+  slong *slabels;    /* list of signed long labels (not including gid)    */
+  ulong *ulabels;    /* list of unsigned long labels                      */
   MPI_Request *reqs; /* pre-allocated for MPI calls                       */
   real *buf;         /* pre-allocated buffer to receive data              */
   uint maxv;         /* maximum vector size                               */
@@ -32,8 +34,10 @@
 #endif
 } gs_data;
 
-gs_data *gs_data_setup(uint n, const ulong *label,
-                       uint maxv, crystal_data *crystal);
+gs_data *gs_data_setup(uint n, const long *label, const ulong *label,
+                       uint maxv, const unsigned int nlabels,
+                       const unsigned int nulabels,
+                       crystal_data *crystal);
 
 #ifndef MPI
 #  undef crystal_data

Modified: MOAB/trunk/parallel/sort.h
===================================================================
--- MOAB/trunk/parallel/sort.h	2008-02-27 22:36:43 UTC (rev 1622)
+++ MOAB/trunk/parallel/sort.h	2008-02-27 23:27:32 UTC (rev 1623)
@@ -37,8 +37,7 @@
 
   ----------------------------------------------------------------------------*/
 
-#define sort jl_sort
-void sort(const uint *A, uint n, uint stride, uint *out, uint *work);
+void jl_sort(const uint *A, uint n, uint stride, uint *out, uint *work);
 
 typedef struct { uint v; uint i; } sort_data;
 void index_sort(const uint *A, uint n, uint stride,
@@ -50,7 +49,7 @@
   void index_sort_long(const ulong *A, uint n, uint stride,
                        uint *idx, sort_data_long *work);
 #else
-#  define sort_long       sort
+#  define sort_long       jl_sort
 #  define sort_data_long  sort_data
 #  define index_sort_long index_sort
 #endif

Modified: MOAB/trunk/parallel/transfer.c
===================================================================
--- MOAB/trunk/parallel/transfer.c	2008-02-27 22:36:43 UTC (rev 1622)
+++ MOAB/trunk/parallel/transfer.c	2008-02-27 23:27:32 UTC (rev 1623)
@@ -35,10 +35,10 @@
 void gs_transfer(int dynamic, tuple_list *tl,
                  unsigned pf, crystal_data *crystal)
 {
-  const unsigned mi=tl->mi,ml=tl->ml,mr=tl->mr;
-  const unsigned tsize = (mi-1) + ml*UINT_PER_LONG + mr*UINT_PER_REAL;
+  const unsigned mi=tl->mi,ml=tl->ml,mul=tl->mul,mr=tl->mr;
+  const unsigned tsize = (mi-1) + ml*UINT_PER_LONG + mul*UINT_PER_LONG + mr*UINT_PER_REAL;
   sint p, lp = -1;
-  sint *ri; slong *rl; real *rr;
+  sint *ri; slong *rl; ulong *rul; real *rr;
   uint i, j, *buf, *len=0, *buf_end;
 
   /* sort to group by target proc */
@@ -47,7 +47,7 @@
   /* pack into buffer for crystal router */
   buffer_reserve(&crystal->all->buf,(tl->n*(3+tsize))*sizeof(uint));
   crystal->all->n=0, buf = crystal->all->buf.ptr;
-  ri=tl->vi,rl=tl->vl,rr=tl->vr;
+  ri=tl->vi,rl=tl->vl,rul=tl->vul,rr=tl->vr;
   for(i=tl->n;i;--i) {
     p = ri[pf];
     if(p!=lp) {
@@ -60,6 +60,8 @@
     for(j=0;j<mi;++j,++ri) if(j!=pf) *buf++ = *ri;
     for(j=ml;j;--j,++rl)
       memcpy(buf,rl,sizeof(slong)), buf+=UINT_PER_LONG;
+    for(j=mul;j;--j,++rul)
+      memcpy(buf,rul,sizeof(ulong)), buf+=UINT_PER_LONG;
     for(j=mr;j;--j,++rr)
       memcpy(buf,rr,sizeof(real )), buf+=UINT_PER_REAL;
     *len += tsize, crystal->all->n += tsize;
@@ -70,7 +72,7 @@
   /* unpack */
   buf = crystal->all->buf.ptr, buf_end = buf + crystal->all->n;
   tl->n = 0;
-  ri=tl->vi,rl=tl->vl,rr=tl->vr;
+  ri=tl->vi,rl=tl->vl,rul=tl->vul,rr=tl->vr;
   while(buf != buf_end) {
     sint p, len;
     buf++;        /* target ( == this proc ) */
@@ -80,11 +82,13 @@
       if(tl->n==tl->max) {
         if(!dynamic) { tl->n = tl->max + 1; return; }
         tuple_list_grow(tl);
-        ri = tl->vi + mi*tl->n, rl = tl->vl + ml*tl->n, rr = tl->vr + mr*tl->n;
+        ri = tl->vi + mi*tl->n, rl = tl->vl + ml*tl->n;
+        rul = tl->vul + mul*tl->n, rr = tl->vr + mr*tl->n;
       }
       ++tl->n;
       for(j=0;j<mi;++j) if(j!=pf) *ri++ = *buf++; else *ri++ = p;
       for(j=ml;j;--j) memcpy(rl++,buf,sizeof(slong)), buf+=UINT_PER_LONG;
+      for(j=mul;j;--j) memcpy(rul++,buf,sizeof(ulong)), buf+=UINT_PER_LONG;
       for(j=mr;j;--j) memcpy(rr++,buf,sizeof(real )), buf+=UINT_PER_REAL;
       len-=tsize;
     }

Modified: MOAB/trunk/parallel/tuple_list.c
===================================================================
--- MOAB/trunk/parallel/tuple_list.c	2008-02-27 22:36:43 UTC (rev 1622)
+++ MOAB/trunk/parallel/tuple_list.c	2008-02-27 23:27:32 UTC (rev 1623)
@@ -9,16 +9,17 @@
 #include "sort.h"
 
 typedef struct {
-  unsigned mi,ml,mr;
+  unsigned mi,ml,mul,mr;
   uint n, max;
-  sint *vi; slong *vl; real *vr;
+  sint *vi; slong *vl; ulong *vul; real *vr;
 } tuple_list;
 
 void tuple_list_permute(tuple_list *tl, uint *perm, void *work)
 {
-  const unsigned mi=tl->mi, ml=tl->ml, mr=tl->mr;
+  const unsigned mi=tl->mi, ml=tl->ml, mul=tl->mul, mr=tl->mr;
   const unsigned int_size  = mi*sizeof(sint),
                  long_size = ml*sizeof(slong),
+                 ulong_size = mul*sizeof(ulong),
                  real_size = mr*sizeof(real);
   if(mi) {
     uint *p=perm, *pe=p+tl->n; char *sorted=work;
@@ -30,6 +31,11 @@
     while(p!=pe) memcpy(sorted,&tl->vl[ml*(*p++)],long_size),sorted+=long_size;
     memcpy(tl->vl,work,long_size*tl->n);
   }
+  if(mul) {
+    uint *p=perm, *pe=p+tl->n; char *sorted=work;
+    while(p!=pe) memcpy(sorted,&tl->vul[mul*(*p++)],ulong_size),sorted+=ulong_size;
+    memcpy(tl->vul,work,ulong_size*tl->n);
+  }
   if(mr) {
     uint *p=perm, *pe=p+tl->n; char *sorted=work;
     while(p!=pe) memcpy(sorted,&tl->vr[mr*(*p++)],real_size),sorted+=real_size;
@@ -39,11 +45,13 @@
 
 void tuple_list_sort(tuple_list *tl, unsigned key, buffer *buf)
 {
-  const unsigned mi=tl->mi, ml=tl->ml, mr=tl->mr;
+  const unsigned mi=tl->mi, ml=tl->ml, mul=tl->mul, mr=tl->mr;
   const unsigned int_size =  mi*sizeof(sint);
   const unsigned long_size = ml*sizeof(slong);
+  const unsigned ulong_size = mul*sizeof(ulong);
   const unsigned real_size = mr*sizeof(real);
-  const unsigned width = umax_3(int_size,long_size,real_size);
+  const unsigned width = umax_2(umax_2(int_size,long_size),
+                                umax_2(ulong_size,real_size));
   const unsigned data_size = key>=mi ? sizeof(sort_data_long):sizeof(sort_data);
   uint work_min=tl->n * umax_2(2*data_size,sizeof(sint)+width);
   uint *work;
@@ -51,8 +59,11 @@
   work = buf->ptr;
   if(key<mi)
     index_sort     ((uint *)&tl->vi[key   ],tl->n,mi, work, (void*)work);
-  else
+  else if (key < mi+mul)
     index_sort_long((ulong*)&tl->vl[key-mi],tl->n,ml, work, (void*)work);
+  else 
+    index_sort_long((ulong*)&tl->vul[key-mi-ml],tl->n,mul, work, (void*)work);
+
   tuple_list_permute(tl,work,work+tl->n);
 }
 

Modified: MOAB/trunk/parallel/tuple_list.h
===================================================================
--- MOAB/trunk/parallel/tuple_list.h	2008-02-27 22:36:43 UTC (rev 1622)
+++ MOAB/trunk/parallel/tuple_list.h	2008-02-27 23:27:32 UTC (rev 1623)
@@ -16,14 +16,14 @@
 #define TUPLE_LIST_H
 
 /* requires "errmem.h" and "types.h" */
-#if !defined(ERRMEM_H) || !defined(TYPES_H)
-#warning "tuple_list.h" requires "errmem.h" and "types.h"
+#if !defined(ERRMEM_H) || !defined(TYPES_H) || !defined(MINMAX_H) || !defined(SORT_H)
+#warning "tuple_list.h" requires "errmem.h" and "types.h" and  "minmax.h" and "sort.h"
 #endif
 
 typedef struct {
-  unsigned mi,ml,mr;
+  unsigned mi,ml,mul,mr;
   uint n, max;
-  sint *vi; slong *vl; real *vr;
+  sint *vi; slong *vl; ulong *vul; real *vr;
 } tuple_list;
 
 /* storage layed out as: vi[max][mi], vl[max][ml], vr[max][mr]
@@ -31,17 +31,19 @@
    only the first n tuples are in use */
 
 static void tuple_list_init_max(tuple_list *tl,
-  unsigned mi, unsigned ml, unsigned mr, uint max)
+                                unsigned mi, unsigned ml, unsigned mul, 
+                                unsigned mr, uint max)
 {
   tl->n=0; tl->max=max;
-  tl->mi=mi,tl->ml=ml,tl->mr=mr;
+  tl->mi=mi,tl->ml=ml,tl->mul=mul,tl->mr=mr;
   tl->vi=tmalloc(sint, max*mi);
   tl->vl=tmalloc(slong,max*ml);
+  tl->vul=tmalloc(ulong,max*mul);
   tl->vr=tmalloc(real, max*mr);
 }
 
 static void tuple_list_free(tuple_list *tl) {
-  free(tl->vi), free(tl->vl), free(tl->vr);
+  free(tl->vi), free(tl->vl), free(tl->vul), free(tl->vr);
 }
 
 static void tuple_list_resize(tuple_list *tl, uint max)
@@ -49,6 +51,7 @@
   tl->max = max;
   tl->vi=trealloc(sint, tl->vi,tl->max*tl->mi);
   tl->vl=trealloc(slong,tl->vl,tl->max*tl->ml);
+  tl->vul=trealloc(ulong,tl->vul,tl->max*tl->mul);
   tl->vr=trealloc(real, tl->vr,tl->max*tl->mr);
 }
 
@@ -58,8 +61,6 @@
 }
 
 void tuple_list_permute(tuple_list *tl, uint *perm, void *work);
-/* sort tuples by the field specified by key<mi+ml;
-   entries in vi[:][key] (or vl[:][key-mi]) assumed nonnegative */
 void tuple_list_sort(tuple_list *tl, unsigned key, buffer *buf);
 
 #endif




More information about the moab-dev mailing list