[MOAB-dev] r3028 - MOAB/trunk/parallel
tautges at mcs.anl.gov
tautges at mcs.anl.gov
Fri Jul 17 22:02:44 CDT 2009
Author: tautges
Date: 2009-07-17 22:02:44 -0500 (Fri, 17 Jul 2009)
New Revision: 3028
Modified:
MOAB/trunk/parallel/MBParallelComm.cpp
MOAB/trunk/parallel/MBParallelComm.hpp
Log:
Boy, was this one difficult to find. Note to self: when filling the end of
a partially-used array, don't use the total size as the offset. That is,
std::fill(ps+num_exist, ps+MAX_SHARING_PROCS, -1)
is REALLY BAD for ps[MAX_SHARING_PROCS] and num_exist > 0.
Added some more assert checks and a local check that makes sure the intersection
of procs sharing vertices of an entity is a superset of those sharing the
entity.
Modified: MOAB/trunk/parallel/MBParallelComm.cpp
===================================================================
--- MOAB/trunk/parallel/MBParallelComm.cpp 2009-07-17 22:29:35 UTC (rev 3027)
+++ MOAB/trunk/parallel/MBParallelComm.cpp 2009-07-18 03:02:44 UTC (rev 3028)
@@ -424,6 +424,12 @@
// need to resize to final size, then resubmit irecv pointing to the
// offset buffer position
recv_buff.resize(*((int*)&recv_buff[0]));
+
+#ifdef DEBUG_COMM
+ std::cout << "Posting Irecv from " << mpi_status.MPI_SOURCE
+ << " for 2nd message." << std::endl;
+#endif
+
int success = MPI_Irecv(&recv_buff[INITIAL_BUFF_SIZE], recv_buff.size()-INITIAL_BUFF_SIZE,
MPI_UNSIGNED_CHAR, mpi_status.MPI_SOURCE,
mesg_tag_expected+1, procConfig.proc_comm(), &recv_req);
@@ -1210,6 +1216,7 @@
// save place where remote handle info starts, then scan forward to ents
for (i = 0; i < num_ents; i++) {
UNPACK_INT(buff_ptr, j);
+ assert(j >= 0 && "Should be non-negative # proc/handles.");
buff_ptr += j * (sizeof(int)+sizeof(MBEntityHandle));
}
}
@@ -1232,7 +1239,7 @@
UNPACK_INT(buff_ptr, num_ents2);
// unpack the nodes per entity
- if (MBVERTEX != this_type) {
+ if (MBVERTEX != this_type && num_ents2) {
UNPACK_INT(buff_ptr, verts_per_entity);
}
@@ -1406,6 +1413,10 @@
MBErrorCode MBParallelComm::print_buffer(unsigned char *buff_ptr)
{
+ int total_size;
+ UNPACK_INT(buff_ptr, total_size);
+ std::cout << total_size << " entities..." << std::endl;
+
// 1. # entities = E
int num_ents;
int i, j, k;
@@ -1445,7 +1456,7 @@
UNPACK_INT(buff_ptr, num_ents2);
// unpack the nodes per entity
- if (MBVERTEX != this_type) {
+ if (MBVERTEX != this_type && num_ents2) {
UNPACK_INT(buff_ptr, verts_per_entity);
}
@@ -1664,8 +1675,8 @@
// set sharing tags
if (num_exist > 2) {
- std::fill(tag_ps+num_exist, tag_ps+MAX_SHARING_PROCS, -1);
- std::fill(tag_hs+num_exist, tag_hs+MAX_SHARING_PROCS, 0);
+ std::fill(tag_ps+num_exist, tag_ps+MAX_SHARING_PROCS-num_exist, -1);
+ std::fill(tag_hs+num_exist, tag_hs+MAX_SHARING_PROCS-num_exist, 0);
result = mbImpl->tag_set_data(sharedps_tag(), &new_h, 1, tag_ps);
RRA("Couldn't set sharedps tag.");
result = mbImpl->tag_set_data(sharedhs_tag(), &new_h, 1, tag_hs);
@@ -3005,7 +3016,7 @@
// pad tag data out to MAX_SHARING_PROCS with -1
assert( mit->first.size() <= MAX_SHARING_PROCS );
std::copy( mit->first.begin(), mit->first.end(), proc_ids );
- std::fill( proc_ids + mit->first.size(), proc_ids + MAX_SHARING_PROCS, -1 );
+ std::fill( proc_ids + mit->first.size(), proc_ids + MAX_SHARING_PROCS - mit->first.size(), -1 );
result = mbImpl->tag_set_data(sharedps_tag, &new_set, 1, proc_ids );
}
RRA("Failed to tag interface set with procs.");
@@ -3509,6 +3520,11 @@
MPE_Log_event(GHOST_START, procConfig.proc_rank(), "Starting ghost exchange.");
#endif
+#ifdef DEBUG_COMM
+ std::cout << "Entering exchange_ghost_cells with num_layers = "
+ << num_layers << std::endl;
+#endif
+
// if we're only finding out about existing ents, we have to be storing
// remote handles too
assert(num_layers > 0 || store_remote_handles);
@@ -3534,6 +3550,10 @@
std::fill(sendReqs, sendReqs+2*buffProcs.size(), MPI_REQUEST_NULL);
for (ind = 0, proc_it = buffProcs.begin();
proc_it != buffProcs.end(); proc_it++, ind++) {
+#ifdef DEBUG_COMM
+ std::cout << "Posting Irecv from " << buffProcs[ind]
+ << " for ghost entities." << std::endl;
+#endif
success = MPI_Irecv(&ghostRBuffs[ind][0], ghostRBuffs[ind].size(),
MPI_UNSIGNED_CHAR, buffProcs[ind],
MB_MESG_ENTS, procConfig.proc_comm(),
@@ -3653,9 +3673,26 @@
#endif
#ifdef DEBUG_MPE
- MPE_Log_event(IFACE_END, procConfig.proc_rank(), "Ending interface exchange.");
+ MPE_Log_event(IFACE_END, procConfig.proc_rank(), "Ending interface exchange.");
#endif
+#ifdef DEBUG_COMM
+ std::cout << "Exiting exchange_ghost_cells" << std::endl;
+#endif
+ //===========================================
+ // wait if requested
+ //===========================================
+ if (wait_all) {
+#ifdef DEBUG_COMM
+ success = MPI_Barrier(procConfig.proc_comm());
+#else
+ success = MPI_Waitall(buffProcs.size(), &recv_reqs[0], &status[0]);
+#endif
+ if (MPI_SUCCESS != success) {
+ result = MB_FAILURE;
+ RRA("Failed in waitall in ghost exchange.");
+ }
+ }
return MB_SUCCESS;
}
@@ -3665,6 +3702,10 @@
for (ind = 0, proc_it = buffProcs.begin();
proc_it != buffProcs.end(); proc_it++, ind++) {
// skip if iface layer and lower-rank proc
+#ifdef DEBUG_COMM
+ std::cout << "Posting Irecv from " << buffProcs[ind]
+ << " for remote handles." << std::endl;
+#endif
success = MPI_Irecv(&ghostRBuffs[ind][0], ghostRBuffs[ind].size(),
MPI_UNSIGNED_CHAR, buffProcs[ind],
MB_MESG_REMOTE_HANDLES, procConfig.proc_comm(),
@@ -3751,7 +3792,25 @@
result = check_all_shared_handles();
RRA("Failed check on all shared handles.");
#endif
+#ifdef DEBUG_COMM
+ std::cout << "Exiting exchange_ghost_cells" << std::endl;
+#endif
+ //===========================================
+ // wait if requested
+ //===========================================
+ if (wait_all) {
+#ifdef DEBUG_COMM
+ success = MPI_Barrier(procConfig.proc_comm());
+#else
+ success = MPI_Waitall(buffProcs.size(), &recv_reqs[0], &status[0]);
+#endif
+ if (MPI_SUCCESS != success) {
+ result = MB_FAILURE;
+ RRA("Failed in waitall in ghost exchange.");
+ }
+ }
+
return MB_SUCCESS;
}
@@ -3821,7 +3880,7 @@
RRA("");
}
else {
- unsigned int j = (ps[0] == procConfig.proc_rank() ? 1 : 0);
+ unsigned int j = (ps[0] == (int)procConfig.proc_rank() ? 1 : 0);
assert(-1 != ps[j]);
result = mbImpl->tag_set_data(sharedp_tag(), &ent, 1, ps+j);
RRA("");
@@ -4355,7 +4414,8 @@
// ok, now wait
MPI_Status status[MAX_SHARING_PROCS];
- success = MPI_Waitall(2*buffProcs.size(), &sendReqs[0], status);
+ success = MPI_Barrier(procConfig.proc_comm());
+// success = MPI_Waitall(2*buffProcs.size(), &sendReqs[0], status);
if (MPI_SUCCESS != success) {
result = MB_FAILURE;
RRA("Failure in waitall in tag exchange.");
@@ -5352,8 +5412,11 @@
// get all shared ent data from other procs
std::vector<std::vector<SharedEntityData> > shents(buffProcs.size()),
send_data(buffProcs.size());
- MBErrorCode result;
+ MBErrorCode result = check_local_shared();
+ if (MB_SUCCESS != result)
+ return result;
+
result = pack_shared_handles(send_data);
if (MB_SUCCESS != result)
return result;
@@ -5367,6 +5430,84 @@
return check_my_shared_handles(shents);
}
+MBErrorCode MBParallelComm::check_local_shared()
+{
+ // do some local checks on shared entities to make sure things look
+ // consistent
+
+ // check that non-vertex shared entities are shared by same procs as all
+ // their vertices
+ std::pair<MBRange::const_iterator,MBRange::const_iterator> vert_it =
+ sharedEnts.equal_range(MBVERTEX);
+ std::vector<MBEntityHandle> dum_connect;
+ const MBEntityHandle *connect;
+ int num_connect;
+ int tmp_procs[MAX_SHARING_PROCS];
+ MBEntityHandle tmp_hs[MAX_SHARING_PROCS];
+ std::set<int> tmp_set, vset;
+ int num_ps;
+ MBErrorCode result;
+ unsigned char pstat;
+ MBRange bad_ents;
+ for (MBRange::const_iterator rit = sharedEnts.begin(); rit != sharedEnts.end(); rit++) {
+
+ // get sharing procs for this ent
+ result = get_sharing_data(*rit, tmp_procs, tmp_hs, pstat, num_ps);
+ if (MB_SUCCESS != result || num_ps < (int)vset.size()) {
+ bad_ents.insert(*rit);
+ continue;
+ }
+
+ // entity must be shared
+ if (!(pstat & PSTATUS_SHARED) ||
+ // if entity is not owned this must not be first proc
+ (pstat & PSTATUS_NOT_OWNED && tmp_procs[0] == (int)procConfig.proc_rank()) ||
+ // if entity is owned and multishared, this must be first proc
+ (!(pstat & PSTATUS_NOT_OWNED) && pstat & PSTATUS_MULTISHARED &&
+ (tmp_procs[0] != (int)procConfig.proc_rank() || tmp_hs[0] != *rit))) {
+ bad_ents.insert(*rit);
+ continue;
+ }
+
+ if (mbImpl->type_from_handle(*rit) == MBVERTEX) continue;
+
+ // copy element's procs to vset and save size
+ int orig_ps = num_ps; vset.clear();
+ std::copy(tmp_procs, tmp_procs+num_ps, std::inserter(vset, vset.begin()));
+
+ // get vertices for this ent and intersection of sharing procs
+ result = mbImpl->get_connectivity(*rit, connect, num_connect, false, &dum_connect);
+ if (MB_SUCCESS != result) {bad_ents.insert(*rit); continue;}
+
+ for (int i = 0; i < num_connect; i++) {
+ result = get_sharing_data(connect[i], tmp_procs, NULL, pstat, num_ps);
+ if (MB_SUCCESS != result) {bad_ents.insert(*rit); continue;}
+ if (!num_ps) {vset.clear(); break;}
+ std::sort(tmp_procs, tmp_procs+num_ps);
+ tmp_set.clear();
+ std::set_intersection(tmp_procs, tmp_procs+num_ps,
+ vset.begin(), vset.end(), std::inserter(tmp_set, tmp_set.end()));
+ vset.swap(tmp_set);
+ if (vset.empty()) break;
+ }
+
+ // intersect them; should be the same size as orig_ps
+ tmp_set.clear();
+ std::set_intersection(tmp_procs, tmp_procs+num_ps,
+ vset.begin(), vset.end(), std::inserter(tmp_set, tmp_set.end()));
+ if (orig_ps != (int)tmp_set.size()) bad_ents.insert(*rit);
+ }
+
+ if (!bad_ents.empty()) {
+ list_entities(bad_ents);
+ return MB_FAILURE;
+ }
+
+ // to do: check interface sets
+
+ return MB_SUCCESS;
+}
+
MBErrorCode MBParallelComm::check_all_shared_handles(MBParallelComm **pcs,
int num_pcs)
{
Modified: MOAB/trunk/parallel/MBParallelComm.hpp
===================================================================
--- MOAB/trunk/parallel/MBParallelComm.hpp 2009-07-17 22:29:35 UTC (rev 3027)
+++ MOAB/trunk/parallel/MBParallelComm.hpp 2009-07-18 03:02:44 UTC (rev 3028)
@@ -541,6 +541,11 @@
MBErrorCode pack_shared_handles(
std::vector<std::vector<SharedEntityData> > &send_data);
+ // check consistency of sharedEnts against their tags and their
+ // vertices' tags
+ MBErrorCode check_local_shared();
+
+ // check contents of communicated shared entity data against tags
MBErrorCode check_my_shared_handles(
std::vector<std::vector<SharedEntityData> > &shents,
const char *prefix = NULL);
More information about the moab-dev
mailing list