[MOAB-dev] parallel write

Lukasz Kaczmarczyk likask at civil.gla.ac.uk
Fri Jul 2 10:01:10 CDT 2010


Thanks for answer, it looks that MPI-IO not working well with NFS system.

As a solution, I use sequential writting and gather information about meshset on root node. This will not work for extremally larg meshes but is ok for me. I think that you should consider such option in MOAB implementation. This is how I do it now, it is far from pefect, but works,

  mbImpl->create_meshset(MESHSET_SET,outset);
  gather_PostProcTrefftzMeshSet_entities(ElemTrefftzMeshSet,outset,3); 
  if((int)mbpc->proc_config().proc_rank() == 0) 
    result = mbImpl->write_file(file,0,0,&outset,1,tag_list,2); EE(mbImpl) RR
  mbImpl->clear_meshset(&outset,1);
  mbImpl->delete_entities(&outset,1);

void FETrefftzShell::gather_PostProcTrefftzMeshSet_entities(MBEntityHandle inset,MBEntityHandle outset,int dim) {
  MBRange entities_to_gather;
  mbImpl->get_entities_by_dimension(inset,3,entities_to_gather);
  if ((int)mbpc->proc_config().proc_rank() == 0) {
    mbImpl->add_entities(outset,entities_to_gather);
  }
  int buff_size,success;
  MPI_Status status;
  MBParallelComm::Buffer buff(INITIAL_BUFF_SIZE);
  buff.reset_ptr(sizeof(int));
  for( unsigned int rr = 1; rr<mbpc->proc_config().proc_size(); rr++) {
    if ((int)mbpc->proc_config().proc_rank() == rr) {
      add_verts(entities_to_gather);
      RRA("Failed to add adj vertices.");
      buff.reset_ptr(sizeof(int));
      result = mbpc->pack_buffer( entities_to_gather, false, true, 
                          false, -1, &buff); 
      RRA("Failed to compute buffer size in broadcast_entities.");
      buff.set_stored_size();
      buff_size = buff.buff_ptr - buff.mem_ptr;

      success = MPI_Ssend(&buff_size, 1, MPI_INT, 0, 100+rr, mbpc->proc_config().proc_comm() );
      if (MPI_SUCCESS != success) {
	result = MB_FAILURE;
	RRA("MPI_Send of buffer size failed.");
      }
    } else if( (int)mbpc->proc_config().proc_rank() == 0 ) {
      success = MPI_Recv(&buff_size, 1, MPI_INT, rr, 100+rr, mbpc->proc_config().proc_comm(), &status);
      if (MPI_SUCCESS != success) {
	result = MB_FAILURE;
	RRA("MPI_Recive of buffer size failed.");
      }
    }
    if (!buff_size) continue; //no data
    size_t offset = 0;
    while (buff_size) {
	int size = std::min( buff_size, MAX_BCAST_SIZE );
        buff.reserve(buff_size);
        if( (int)mbpc->proc_config().proc_rank() == rr ) {
	 success = MPI_Ssend(buff.mem_ptr, size, MPI_UNSIGNED_CHAR, 0, 1000+rr, mbpc->proc_config().proc_comm() );
	  if (MPI_SUCCESS != success) {
	    result = MB_FAILURE;
	    RRA("MPI_Send of buffer size failed."); }
	  } else if( (int)mbpc->proc_config().proc_rank() == 0 ) {
	    success = MPI_Recv(buff.mem_ptr, size, MPI_UNSIGNED_CHAR, rr, 1000+rr, mbpc->proc_config().proc_comm(), &status );
	    if (MPI_SUCCESS != success) {
	      result = MB_FAILURE;
	      RRA("MPI_Send of buffer size failed."); }
	  }
	offset += size;
	buff_size -= size;
    }
    MBRange entities_recived;
    if ((int)mbpc->proc_config().proc_rank() == 0) {
      std::vector<std::vector<EntityHandle> > dum1a, dum1b;
      std::vector<std::vector<int> > dum1p;
      std::vector<EntityHandle> dum2;
      std::vector<unsigned int> dum3;
      buff.reset_ptr(sizeof(int));
      result = mbpc->unpack_buffer(buff.buff_ptr, false, rr, -1, 
                           dum1a, dum1b, dum1p, dum2, dum2, dum3, entities_recived);
      RRA("Failed to unpack buffer in broadcast_entities.");
      mbImpl->add_entities(outset,entities_recived);
    }
  }
  return;
}




On 2 Jul 2010, at 15:06, Jason Kraftcheck wrote:

> Lukasz Kaczmarczyk wrote:
>> Hello,
>> 
>> I do parallel write to moab file format.  I get random error related. Do you have idea what is a reason of this problem? Or how I can overcome this.  It is easy way to gather mesh/meshset on one processor and then do writing?
>> 
>> 
> 
> I have not seen that particular error before.  As the library reports it as
> "Internal error (too specific to document in detail)" and "Some MPI function
> failed", my first guess would be that there is some problem with the HDF5
> library or the underlying MPI-IO implementation.
> 
> - jason



More information about the moab-dev mailing list