[MOAB-dev] commit/MOAB: danwu: Updated NC writer and its unit test. Parallel write should work now for HOMME.

commits-noreply at bitbucket.org commits-noreply at bitbucket.org
Thu Apr 24 15:48:01 CDT 2014


1 new commit in MOAB:

https://bitbucket.org/fathomteam/moab/commits/9136633af5e1/
Changeset:   9136633af5e1
Branch:      ncwriter
User:        danwu
Date:        2014-04-24 22:47:43
Summary:     Updated NC writer and its unit test. Parallel write should work now for HOMME.

Affected #:  3 files

diff --git a/src/io/NCWriteHOMME.cpp b/src/io/NCWriteHOMME.cpp
index 45ccff0..36ea5c7 100644
--- a/src/io/NCWriteHOMME.cpp
+++ b/src/io/NCWriteHOMME.cpp
@@ -6,6 +6,7 @@
 
 #include "NCWriteHOMME.hpp"
 #include "moab/WriteUtilIface.hpp"
+#include "MBTagConventions.hpp"
 
 #define ERRORR(rval, str) \
   if (MB_SUCCESS != rval) { _writeNC->mWriteIface->report_error("%s", str); return rval; }
@@ -51,13 +52,36 @@ ErrorCode NCWriteHOMME::collect_mesh_info()
   ERRORR(rval, "Trouble getting local vertices in current file set.");
   assert(!local_verts.empty());
 
-  std::vector<int> gids(local_verts.size());
-  rval = mbImpl->tag_get_data(mGlobalIdTag, local_verts, &gids[0]);
+#ifdef USE_MPI
+  bool& isParallel = _writeNC->isParallel;
+  if (isParallel) {
+    ParallelComm*& myPcomm = _writeNC->myPcomm;
+    int rank = myPcomm->proc_config().proc_rank();
+    int procs = myPcomm->proc_config().proc_size();
+    if (procs > 1) {
+      rval = myPcomm->filter_pstatus(local_verts, PSTATUS_NOT_OWNED, PSTATUS_NOT, -1, &localVertsOwned);
+      ERRORR(rval, "Trouble getting owned vertices in set.");
+      // Assume that PARALLEL_RESOLVE_SHARED_ENTS option is set
+      // We should avoid writing in parallel with overlapped data
+      if (rank > 0)
+        assert("PARALLEL_RESOLVE_SHARED_ENTS option is set" && localVertsOwned.size() < local_verts.size());
+    }
+    else
+      localVertsOwned = local_verts;
+  }
+  else
+    localVertsOwned = local_verts; // Not running in parallel, but still with MPI
+#else
+  localVertsOwned = local_verts;
+#endif
+
+  std::vector<int> gids(localVertsOwned.size());
+  rval = mbImpl->tag_get_data(mGlobalIdTag, localVertsOwned, &gids[0]);
   ERRORR(rval, "Trouble getting global IDs on local vertices.");
 
   // Restore localGidVerts
-  std::copy(gids.rbegin(), gids.rend(), range_inserter(localGidVerts));
-  nLocalVertices = localGidVerts.size();
+  std::copy(gids.rbegin(), gids.rend(), range_inserter(localGidVertsOwned));
+  nLocalVerticesOwned = localGidVertsOwned.size();
 
   return MB_SUCCESS;
 }
@@ -100,8 +124,8 @@ ErrorCode NCWriteHOMME::collect_variable_data(std::vector<std::string>& var_name
           // Vertices
           // Start from the first localGidVerts
           // Actually, this will be reset later for writing
-          currentVarData.writeStarts[2] = localGidVerts[0] - 1;
-          currentVarData.writeCounts[2] = nLocalVertices;
+          currentVarData.writeStarts[2] = localGidVertsOwned[0] - 1;
+          currentVarData.writeCounts[2] = nLocalVerticesOwned;
           break;
         default:
           ERRORR(MB_FAILURE, "Unexpected entity location type for HOMME non-set variable.");
@@ -124,8 +148,6 @@ ErrorCode NCWriteHOMME::write_values(std::vector<std::string>& var_names)
   std::set<std::string>& dummyVarNames = _writeNC->dummyVarNames;
   std::map<std::string, WriteNC::VarData>& varInfo = _writeNC->varInfo;
 
-  ErrorCode rval;
-
   // Start with coordinates
   for (std::set<std::string>::iterator setIt = usedCoordinates.begin();
       setIt != usedCoordinates.end(); ++setIt) {
@@ -172,12 +194,9 @@ ErrorCode NCWriteHOMME::write_values(std::vector<std::string>& var_names)
     int numTimeSteps = (int)variableData.varTags.size();
     if (variableData.has_tsteps) {
       // Get entities of this variable
-      Range ents;
       switch (variableData.entLoc) {
         case WriteNC::ENTLOCVERT:
           // Vertices
-          rval = mbImpl->get_entities_by_dimension(_fileSet, 0, ents);
-          ERRORR(rval, "Can't get entities for vertices.");
           break;
         default:
           ERRORR(MB_FAILURE, "Unexpected entity location type for HOMME non-set variable.");
@@ -189,16 +208,16 @@ ErrorCode NCWriteHOMME::write_values(std::vector<std::string>& var_names)
       // FIXME: Should use tstep_nums (from writing options) later
       for (int j = 0; j < numTimeSteps; j++) {
         // We will write one time step, and count will be one; start will be different
-        // We will write values directly from tag_iterate, but we should also transpose for level
-        // so that means deep copy for transpose
+        // Use tag_get_data instead of tag_iterate to get values, as localVertsOwned
+        // might not be contiguous. We should also transpose for level so that means
+        // deep copy for transpose
         variableData.writeStarts[0] = j; // This is time, again
-        int count;
-        void* dataptr;
-        rval = mbImpl->tag_iterate(variableData.varTags[j], ents.begin(), ents.end(), count, dataptr);
-        assert(count == (int)ents.size());
+        std::vector<double> tag_data(nLocalVerticesOwned * variableData.numLev);
+        ErrorCode rval = mbImpl->tag_get_data(variableData.varTags[j], localVertsOwned, &tag_data[0]);
+        ERRORR(rval, "Trouble getting tag data on owned vertices.");
 
 #ifdef PNETCDF_FILE
-        size_t nb_writes = localGidVerts.psize();
+        size_t nb_writes = localGidVertsOwned.psize();
         std::vector<int> requests(nb_writes), statuss(nb_writes);
         size_t idxReq = 0;
 #endif
@@ -207,14 +226,14 @@ ErrorCode NCWriteHOMME::write_values(std::vector<std::string>& var_names)
         int success = 0;
         switch (variableData.varDataType) {
           case NC_DOUBLE: {
-            std::vector<double> tmpdoubledata(nLocalVertices * variableData.numLev);
+            std::vector<double> tmpdoubledata(nLocalVerticesOwned * variableData.numLev);
             // Transpose (ncol, lev) back to (lev, ncol)
-            jik_to_kji(nLocalVertices, 1, variableData.numLev, &tmpdoubledata[0], (double*)(dataptr));
+            jik_to_kji(nLocalVerticesOwned, 1, variableData.numLev, &tmpdoubledata[0], &tag_data[0]);
 
             size_t indexInDoubleArray = 0;
             size_t ic = 0;
-            for (Range::pair_iterator pair_iter = localGidVerts.pair_begin();
-                pair_iter != localGidVerts.pair_end(); ++pair_iter, ic++) {
+            for (Range::pair_iterator pair_iter = localGidVertsOwned.pair_begin();
+                pair_iter != localGidVertsOwned.pair_end(); ++pair_iter, ic++) {
               EntityHandle starth = pair_iter->first;
               EntityHandle endh = pair_iter->second;
               variableData.writeStarts[2] = (NCDF_SIZE)(starth - 1);
@@ -236,7 +255,7 @@ ErrorCode NCWriteHOMME::write_values(std::vector<std::string>& var_names)
               // next subrange
               indexInDoubleArray += (endh - starth + 1) * variableData.numLev;
             }
-            assert(ic == localGidVerts.psize());
+            assert(ic == localGidVertsOwned.psize());
 #ifdef PNETCDF_FILE
             success = ncmpi_wait_all(_fileId, requests.size(), &requests[0], &statuss[0]);
             ERRORS(success, "Failed on wait_all.");

diff --git a/src/io/NCWriteHelper.hpp b/src/io/NCWriteHelper.hpp
index 6678bb5..b05fe12 100644
--- a/src/io/NCWriteHelper.hpp
+++ b/src/io/NCWriteHelper.hpp
@@ -100,21 +100,24 @@ class UcdNCWriteHelper : public NCWriteHelper
 public:
   UcdNCWriteHelper(WriteNC* writeNC, int fileId, const FileOptions& opts, EntityHandle fileSet)
 : NCWriteHelper(writeNC, fileId, opts, fileSet),
-  nLocalCells(0), nLocalEdges(0), nLocalVertices(0),
+  nLocalCellsOwned(0), nLocalEdgesOwned(0), nLocalVerticesOwned(0),
   cDim(-1), eDim(-1), vDim(-1) {}
   virtual ~UcdNCWriteHelper() {}
 
 protected:
-  //! Dimensions of my local part of grid
-  int nLocalCells;
-  int nLocalEdges;
-  int nLocalVertices;
+  //! Dimensions of my local owned part of grid
+  int nLocalCellsOwned;
+  int nLocalEdgesOwned;
+  int nLocalVerticesOwned;
 
   //! Dimension numbers for nCells, nEdges and nVertices
   int cDim, eDim, vDim;
 
-  //! Local global ID for cells, edges and vertices
-  Range localGidCells, localGidEdges, localGidVerts;
+  //! Local owned cells, edges and vertices
+  Range localCellsOwned, localEdgesOwned, localVertsOwned;
+
+  //! Local global ID for owned cells, edges and vertices
+  Range localGidCellsOwned, localGidEdgesOwned, localGidVertsOwned;
 };
 
 } // namespace moab

diff --git a/test/io/write_nc.cpp b/test/io/write_nc.cpp
index 827b102..8f87063 100644
--- a/test/io/write_nc.cpp
+++ b/test/io/write_nc.cpp
@@ -106,6 +106,7 @@ void test_eul_read_write_T()
     rval = mb.write_file("test_par_eul_T.nc", 0, write_opts.c_str(), &set, 1);
   else
     rval = mb.write_file("test_eul_T.nc", 0, write_opts.c_str(), &set, 1);
+  CHECK_ERR(rval);
 }
 
 // Check non-set variable T on some quads
@@ -256,6 +257,7 @@ void test_fv_read_write_T()
     rval = mb.write_file("test_par_fv_T.nc", 0, write_opts.c_str(), &set, 1);
   else
     rval = mb.write_file("test_fv_T.nc", 0, write_opts.c_str(), &set, 1);
+  CHECK_ERR(rval);
 }
 
 // Check non-set variable T on some quads
@@ -358,10 +360,6 @@ void test_homme_read_write_T()
     return;
 #endif
 
-  // Only test serial case for the time being
-  if (procs > 1)
-    return;
-
   Core moab;
   Interface& mb = moab;
 
@@ -374,12 +372,21 @@ void test_homme_read_write_T()
 
   // Load non-set variable T, set variable lat, set variable lon, and the mesh
   read_opts += ";DEBUG_IO=0;VARIABLE=T,lat,lon";
+  if (procs > 1)
+    read_opts += ";PARALLEL_RESOLVE_SHARED_ENTS";
   rval = mb.load_file(example_homme, &set, read_opts.c_str());
   CHECK_ERR(rval);
 
   // Write variables T, lat and lon
   std::string write_opts = ";;VARIABLE=T,lat,lon;DEBUG_IO=0;";
-  rval = mb.write_file("test_homme_T.nc", 0, write_opts.c_str(), &set, 1);
+#ifdef USE_MPI
+  // Use parallel options
+  write_opts += std::string(";PARALLEL=WRITE_PART");
+#endif
+  if (procs > 1)
+    rval = mb.write_file("test_par_homme_T.nc", 0, write_opts.c_str(), &set, 1);
+  else
+    rval = mb.write_file("test_homme_T.nc", 0, write_opts.c_str(), &set, 1);
   CHECK_ERR(rval);
 }
 
@@ -387,8 +394,10 @@ void test_homme_read_write_T()
 // Also check set variables lat and lon
 void test_homme_check_T()
 {
+  int rank = 0;
   int procs = 1;
 #ifdef USE_MPI
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   MPI_Comm_size(MPI_COMM_WORLD, &procs);
 #endif
 
@@ -398,10 +407,6 @@ void test_homme_check_T()
     return;
 #endif
 
-  // Only test serial case for the time being
-  if (procs > 1)
-    return;
-
   Core moab;
   Interface& mb = moab;
 
@@ -416,9 +421,14 @@ void test_homme_check_T()
   read_opts += ";VARIABLE=T,lat,lon";
   read_opts += ";CONN=";
   read_opts += example_homme_mapping;
-  rval = mb.load_file("test_homme_T.nc", &set, read_opts.c_str());
+  if (procs > 1)
+    rval = mb.load_file("test_par_homme_T.nc", &set, read_opts.c_str());
+  else
+    rval = mb.load_file("test_homme_T.nc", &set, read_opts.c_str());
   CHECK_ERR(rval);
 
+  double eps = 1e-10;
+
   if (1 == procs) {
     // Get tag lat
     Tag lat_tag;
@@ -432,7 +442,6 @@ void test_homme_check_T()
     CHECK_ERR(rval);
     CHECK_EQUAL(3458, var_len);
     double* lat_val = (double*)var_data;
-    double eps = 1e-10;
     CHECK_REAL_EQUAL(-35.2643896827547, lat_val[0], eps);
     CHECK_REAL_EQUAL(23.8854752772335, lat_val[1728], eps);
     CHECK_REAL_EQUAL(29.8493120043874, lat_val[1729], eps);
@@ -453,33 +462,49 @@ void test_homme_check_T()
     CHECK_REAL_EQUAL(202.5, lon_val[1728], eps);
     CHECK_REAL_EQUAL(194.359423525313, lon_val[1729], eps);
     CHECK_REAL_EQUAL(135, lon_val[3457], eps);
+  }
 
-    // Get tag T0
-    Tag Ttag0;
-    rval = mb.tag_get_handle("T0", 26, MB_TYPE_DOUBLE, Ttag0);
-    CHECK_ERR(rval);
+  // Get tag T0
+  Tag Ttag0;
+  rval = mb.tag_get_handle("T0", 26, MB_TYPE_DOUBLE, Ttag0);
+  CHECK_ERR(rval);
 
-    // Get vertices
-    Range verts;
-    rval = mb.get_entities_by_type(0, MBVERTEX, verts);
-    CHECK_ERR(rval);
-    CHECK_EQUAL((size_t)3458, verts.size());
+  // Get vertices
+  Range verts;
+  rval = mb.get_entities_by_type(0, MBVERTEX, verts);
+  CHECK_ERR(rval);
 
-    // Get all values of tag T0
-    int count;
-    void* Tbuf;
-    rval = mb.tag_iterate(Ttag0, verts.begin(), verts.end(), count, Tbuf);
-    CHECK_ERR(rval);
-    CHECK_EQUAL((size_t)count, verts.size());
+  // Get all values of tag T0
+  int count;
+  void* Tbuf;
+  rval = mb.tag_iterate(Ttag0, verts.begin(), verts.end(), count, Tbuf);
+  CHECK_ERR(rval);
+  CHECK_EQUAL((size_t)count, verts.size());
 
-    // Check some values of tag T0 on first level
-    eps = 0.0001;
-    double* data = (double*) Tbuf;
+  double* data = (double*) Tbuf;
+  eps = 0.0001;
+
+  if (1 == procs) {
+    CHECK_EQUAL((size_t)3458, verts.size());
     CHECK_REAL_EQUAL(233.1136, data[0 * 26], eps); // First vert
     CHECK_REAL_EQUAL(236.1505, data[1728 * 26], eps); // Median vert
     CHECK_REAL_EQUAL(235.7722, data[1729 * 26], eps); // Median vert
     CHECK_REAL_EQUAL(234.0416, data[3457 * 26], eps); // Last vert
   }
+  else if (2 == procs) {
+    if (0 == rank) {
+      CHECK_EQUAL((size_t)1825, verts.size());
+      CHECK_REAL_EQUAL(233.1136, data[0 * 26], eps); // First vert
+      CHECK_REAL_EQUAL(237.1977, data[912 * 26], eps); // Median vert
+      CHECK_REAL_EQUAL(234.9711, data[1824 * 26], eps); // Last vert
+    }
+    else if (1 == rank) {
+      CHECK_EQUAL((size_t)1825, verts.size());
+      CHECK_REAL_EQUAL(233.1136, data[0 * 26], eps); // First vert
+      CHECK_REAL_EQUAL(231.0446, data[912 * 26], eps); // Median vert
+      CHECK_REAL_EQUAL(234.0416, data[1824 * 26], eps); // Last vert
+    }
+  }
 }
 
 void get_eul_read_options(std::string& opts)

Repository URL: https://bitbucket.org/fathomteam/moab/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.


More information about the moab-dev mailing list