[MOAB-dev] commit/MOAB: danwu: Updated NC writer and its unit test. Parallel write should work now for structured mesh (e.g. CAM_EUL).

commits-noreply at bitbucket.org commits-noreply at bitbucket.org
Mon Apr 21 12:05:13 CDT 2014


1 new commit in MOAB:

https://bitbucket.org/fathomteam/moab/commits/0da3e5524647/
Changeset:   0da3e5524647
Branch:      ncwriter
User:        danwu
Date:        2014-04-21 19:04:58
Summary:     Updated NC writer and its unit test. Parallel write should work now for structured mesh (e.g. CAM_EUL).

Affected #:  3 files

diff --git a/src/io/NCWriteHelper.cpp b/src/io/NCWriteHelper.cpp
index 8e50903..aa69d0f 100644
--- a/src/io/NCWriteHelper.cpp
+++ b/src/io/NCWriteHelper.cpp
@@ -457,42 +457,15 @@ ErrorCode ScdNCWriteHelper::write_values(std::vector<std::string>& var_names)
   std::map<std::string, WriteNC::VarData>& varInfo = _writeNC->varInfo;
 
   ErrorCode rval;
-
-  // Start with coordinates
-  for (std::set<std::string>::iterator setIt = usedCoordinates.begin();
-      setIt != usedCoordinates.end(); ++setIt) {
-    const std::string& coordName = *setIt;
-
-    // Skip dummy coordinate variables (if any)
-    if (dummyVarNames.find(coordName) != dummyVarNames.end())
-      continue;
-
-    std::map<std::string, WriteNC::VarData>::iterator vit = varInfo.find(coordName);
-    if (vit == varInfo.end())
-      ERRORR(MB_FAILURE, "Can't find one coordinate variable.");
-
-    WriteNC::VarData& varCoordData = vit->second;
-
-    int success = 0;
-    switch (varCoordData.varDataType) {
-      case NC_DOUBLE:
-        success = NCFUNCAP(_vara_double)(_fileId, varCoordData.varId, &varCoordData.writeStarts[0],
-                  &varCoordData.writeCounts[0], (double*)(varCoordData.memoryHogs[0]));
-        ERRORS(success, "Failed to write double data.");
-        break;
-      case NC_INT:
-        success = NCFUNCAP(_vara_int)(_fileId, varCoordData.varId, &varCoordData.writeStarts[0],
-                  &varCoordData.writeCounts[0], (int*)(varCoordData.memoryHogs[0]));
-        ERRORS(success, "Failed to write int data.");
-        break;
-      default:
-        success = 1;
-        break;
-    }
- }
+  int success;
 
   // Now look at requested var_names; if they have time, we will have a list, and write one at a time
-  // if not, just write regularly
+  // If not, just write regularly
+  // For parallel write, we assume that the data ranges do not overlap across processors (this is true
+  // for Euler and FV variables on quads)
+  // While overlapped writing might still work, we should better not take that risk
+  // Use collective I/O mode put (synchronous write) for the time being, we can try nonblocking put
+  // (request aggregation) later
   for (size_t i = 0; i < var_names.size(); i++) {
     std::map<std::string, WriteNC::VarData>::iterator vit = varInfo.find(var_names[i]);
     if (vit == varInfo.end())
@@ -536,7 +509,6 @@ ErrorCode ScdNCWriteHelper::write_values(std::vector<std::string>& var_names)
         assert(count == (int)ents.size());
 
         // Now write from memory directly
-        int success = 0;
         switch (variableData.varDataType) {
           case NC_DOUBLE: {
             std::vector<double> tmpdoubledata(ni*nj*nk);
@@ -554,7 +526,6 @@ ErrorCode ScdNCWriteHelper::write_values(std::vector<std::string>& var_names)
       }
     }
     else {
-      int success = 0;
       switch (variableData.varDataType) {
         case NC_DOUBLE:
           success = NCFUNCAP(_vara_double)(_fileId, variableData.varId, &variableData.writeStarts[0],
@@ -567,6 +538,62 @@ ErrorCode ScdNCWriteHelper::write_values(std::vector<std::string>& var_names)
     }
   }
 
+  // Write coordinates used by requested var_names
+  // Use independent I/O mode put, since this write is only for the root processor
+  // CAUTION: if the NetCDF ID is from a previous call to ncmpi_create rather than ncmpi_open,
+  // all processors need to call ncmpi_begin_indep_data(). If only the root processor does so,
+  // ncmpi_begin_indep_data() will be blocked forever, :(
+
+  // Enter independent I/O mode
+  success = NCFUNC(begin_indep_data)(_fileId);
+  ERRORS(success, "Failed to begin independent I/O mode.");
+
+  int rank = 0;
+#ifdef USE_MPI
+  bool& isParallel = _writeNC->isParallel;
+  if (isParallel) {
+    ParallelComm*& myPcomm = _writeNC->myPcomm;
+    rank = myPcomm->proc_config().proc_rank();
+  }
+#endif
+  if (0 == rank) {
+    for (std::set<std::string>::iterator setIt = usedCoordinates.begin();
+        setIt != usedCoordinates.end(); ++setIt) {
+      const std::string& coordName = *setIt;
+
+      // Skip dummy coordinate variables (if any)
+      if (dummyVarNames.find(coordName) != dummyVarNames.end())
+        continue;
+
+      std::map<std::string, WriteNC::VarData>::iterator vit = varInfo.find(coordName);
+      if (vit == varInfo.end())
+        ERRORR(MB_FAILURE, "Can't find one coordinate variable.");
+
+      WriteNC::VarData& varCoordData = vit->second;
+
+      switch (varCoordData.varDataType) {
+        case NC_DOUBLE:
+          // Independent I/O mode put
+          success = NCFUNCP(_vara_double)(_fileId, varCoordData.varId, &varCoordData.writeStarts[0],
+                    &varCoordData.writeCounts[0], (double*)(varCoordData.memoryHogs[0]));
+          ERRORS(success, "Failed to write double data.");
+          break;
+        case NC_INT:
+          // Independent I/O mode put
+          success = NCFUNCP(_vara_int)(_fileId, varCoordData.varId, &varCoordData.writeStarts[0],
+                    &varCoordData.writeCounts[0], (int*)(varCoordData.memoryHogs[0]));
+          ERRORS(success, "Failed to write int data.");
+          break;
+        default:
+          ERRORR(MB_FAILURE, "Not implemented yet.");
+      }
+    }
+  }
+
+  // End independent I/O mode
+  success = NCFUNC(end_indep_data)(_fileId);
+  ERRORS(success, "Failed to end independent I/O mode.");
+
   return MB_SUCCESS;
 }
 

diff --git a/src/io/WriteNC.hpp b/src/io/WriteNC.hpp
index f9af683..1c5d78e 100644
--- a/src/io/WriteNC.hpp
+++ b/src/io/WriteNC.hpp
@@ -41,7 +41,10 @@
 //! Collective I/O mode put
 #define NCFUNCAP(func) ncmpi_put ## func ## _all
 
-//! Nonblocking put (request aggregation), used so far only for ucd mesh
+//! Independent I/O mode put
+#define NCFUNCP(func) ncmpi_put ## func
+
+//! Nonblocking put (request aggregation)
 #define NCFUNCREQP(func) ncmpi_iput ## func
 
 #define NCDF_SIZE MPI_Offset
@@ -50,6 +53,7 @@
 #include "netcdf.h"
 #define NCFUNC(func) nc_ ## func
 #define NCFUNCAP(func) nc_put ## func
+#define NCFUNCP(func) nc_put ## func
 #define NCDF_SIZE size_t
 #define NCDF_DIFF ptrdiff_t
 #endif

diff --git a/test/io/write_nc.cpp b/test/io/write_nc.cpp
index 30b3f15..6f2c9af 100644
--- a/test/io/write_nc.cpp
+++ b/test/io/write_nc.cpp
@@ -27,8 +27,8 @@ void test_eul_check_T();
 void test_homme_read_write_T();
 void test_homme_check_T();
 
-void get_eul_options(std::string& opts);
-void get_homme_options(std::string& opts);
+void get_eul_read_options(std::string& opts);
+void get_homme_read_options(std::string& opts);
 
 int main(int argc, char* argv[])
 {
@@ -60,118 +60,170 @@ int main(int argc, char* argv[])
 // In test_eul_check_T(), we need to load the output file with mesh
 void test_eul_read_write_T()
 {
+  int procs = 1;
+#ifdef USE_MPI
+  MPI_Comm_size(MPI_COMM_WORLD, &procs);
+#endif
+
   Core moab;
   Interface& mb = moab;
 
-  std::string orig;
-  get_eul_options(orig);
+  std::string read_opts;
+  get_eul_read_options(read_opts);
 
   EntityHandle set;
   ErrorCode rval = mb.create_meshset(MESHSET_SET, set);
   CHECK_ERR(rval);
 
   // Load non-set variable T, set variable gw, and the mesh
-  std::string opts = orig + std::string(";DEBUG_IO=0;VARIABLE=T,gw");
-  rval = mb.load_file(example_eul, &set, opts.c_str());
+  read_opts += ";DEBUG_IO=0;VARIABLE=T,gw";
+  rval = mb.load_file(example_eul, &set, read_opts.c_str());
   CHECK_ERR(rval);
 
   // Write variables T and gw
-  std::string writeopts;
-  writeopts = std::string(";;VARIABLE=T,gw;DEBUG_IO=0;");
-  rval = mb.write_file("test_eul_T.nc", 0, writeopts.c_str(), &set, 1);
-  CHECK_ERR(rval);
+  std::string write_opts;
+  write_opts = std::string(";;VARIABLE=T,gw;DEBUG_IO=0");
+#ifdef USE_MPI
+  // Use parallel options
+  write_opts += std::string(";PARALLEL=WRITE_PART");
+#endif
+  if (procs > 1)
+    rval = mb.write_file("test_par_eul_T.nc", 0, write_opts.c_str(), &set, 1);
+  else
+    rval = mb.write_file("test_eul_T.nc", 0, write_opts.c_str(), &set, 1);
 }
 
 // Check non-set variable T on some quads
 // Also check set variable gw
 void test_eul_check_T()
 {
+  int rank = 0;
+  int procs = 1;
+#ifdef USE_MPI
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &procs);
+#endif
+
   Core moab;
   Interface& mb = moab;
 
-  std::string opts;
-  get_eul_options(opts);
+  std::string read_opts;
+  get_eul_read_options(read_opts);
 
   EntityHandle set;
   ErrorCode rval = mb.create_meshset(MESHSET_SET, set);
   CHECK_ERR(rval);
 
   // Load non-set variable T, set variable gw, and the mesh
-  opts += std::string(";VARIABLE=T,gw");
-  rval = mb.load_file("test_eul_T.nc", &set, opts.c_str());
+  read_opts += ";VARIABLE=T,gw";
+  if (procs > 1)
+    rval = mb.load_file("test_par_eul_T.nc", &set, read_opts.c_str());
+  else
+    rval = mb.load_file("test_eul_T.nc", &set, read_opts.c_str());
   CHECK_ERR(rval);
 
-  int procs = 1;
-#ifdef USE_MPI
-  ParallelComm* pcomm = ParallelComm::get_pcomm(&mb, 0);
-  procs = pcomm->proc_config().proc_size();
-#endif
+  double eps = 1e-10;
 
-  // Only test serial case for the time being
-  if (1 == procs) {
+  // Only check tag gw values on the root processor
+  if (0 == rank) {
     // Get tag gw
     Tag gw_tag;
     rval = mb.tag_get_handle("gw", 0, MB_TYPE_OPAQUE, gw_tag, MB_TAG_SPARSE | MB_TAG_VARLEN);
     CHECK_ERR(rval);
 
-    // Check some values of tag gw
     const void* var_data;
     int var_len = 0;
     rval = mb.tag_get_by_ptr(gw_tag, &set, 1, &var_data, &var_len);
     CHECK_ERR(rval);
     CHECK_EQUAL(48, var_len);
     double* gw_val = (double*)var_data;
-    double eps = 1e-10;
     CHECK_REAL_EQUAL(0.00315334605230584, gw_val[0], eps);
     CHECK_REAL_EQUAL(0.0647376968126839, gw_val[23], eps);
     CHECK_REAL_EQUAL(0.0647376968126839, gw_val[24], eps);
     CHECK_REAL_EQUAL(0.00315334605230584, gw_val[47], eps);
+  }
 
-    // Get tag T0
-    Tag Ttag0;
-    rval = mb.tag_get_handle("T0", 26, MB_TYPE_DOUBLE, Ttag0);
-    CHECK_ERR(rval);
+  // Get tag T0
+  Tag Ttag0;
+  rval = mb.tag_get_handle("T0", 26, MB_TYPE_DOUBLE, Ttag0);
+  CHECK_ERR(rval);
 
-    // Check some values of tag T0 on first level
-    double val[4 * 26];
+  eps = 0.0001;
+  double val[8 * 26];
+
+  if (1 == procs) {
     Range global_quads;
-    rval = mb.get_entities_by_type(set, MBQUAD, global_quads);
+    rval = mb.get_entities_by_type(0, MBQUAD, global_quads);
     CHECK_ERR(rval);
     CHECK_EQUAL((size_t)4608, global_quads.size());
-    EntityHandle gloabl_quad_ents[] = {global_quads[0], global_quads[2303], global_quads[2304], global_quads[4607]};
-    rval = mb.tag_get_data(Ttag0, &gloabl_quad_ents[0], 4, val);
-    CHECK_ERR(rval);
-    eps = 0.0001;
+
+    EntityHandle gloabl_quad_ents[] = {global_quads[0], global_quads[2255], global_quads[2304], global_quads[4559],
+                                       global_quads[48], global_quads[2303], global_quads[2352], global_quads[4607]};
+    rval = mb.tag_get_data(Ttag0, &gloabl_quad_ents[0], 8, val);
+
     CHECK_REAL_EQUAL(252.8529, val[0 * 26], eps); // First global quad
-    CHECK_REAL_EQUAL(232.6670, val[1 * 26], eps); // 2304th global quad
+    CHECK_REAL_EQUAL(234.8390, val[1 * 26], eps); // 2256th global quad
     CHECK_REAL_EQUAL(232.6458, val[2 * 26], eps); // 2305th global quad
-    CHECK_REAL_EQUAL(200.6828, val[3 * 26], eps); // Last global quad
+    CHECK_REAL_EQUAL(205.3905, val[3 * 26], eps); // 4560th global quad
+    CHECK_REAL_EQUAL(252.7116, val[4 * 26], eps); // 49th global quad
+    CHECK_REAL_EQUAL(232.6670, val[5 * 26], eps); // 2304th global quad
+    CHECK_REAL_EQUAL(234.6922, val[6 * 26], eps); // 2353th global quad
+    CHECK_REAL_EQUAL(200.6828, val[7 * 26], eps); // Last global quad
+  }
+  else if (2 == procs) {
+    Range local_quads;
+    rval = mb.get_entities_by_type(0, MBQUAD, local_quads);
+    CHECK_ERR(rval);
+    CHECK_EQUAL((size_t)2304, local_quads.size());
+
+    EntityHandle local_quad_ents[] = {local_quads[0], local_quads[1151], local_quads[1152], local_quads[2303]};
+    rval = mb.tag_get_data(Ttag0, &local_quad_ents[0], 4, val);
+
+    if (0 == rank) {
+      CHECK_REAL_EQUAL(252.8529, val[0 * 26], eps); // First local quad, first global quad
+      CHECK_REAL_EQUAL(234.8390, val[1 * 26], eps); // Median local quad, 2256th global quad
+      CHECK_REAL_EQUAL(232.6458, val[2 * 26], eps); // Median local quad, 2305th global quad
+      CHECK_REAL_EQUAL(205.3905, val[3 * 26], eps); // Last local quad, 4560th global quad
+    }
+    else if (1 == rank) {
+      CHECK_REAL_EQUAL(252.7116, val[0 * 26], eps); // First local quad, 49th global quad
+      CHECK_REAL_EQUAL(232.6670, val[1 * 26], eps); // Median local quad, 2304th global quad
+      CHECK_REAL_EQUAL(234.6922, val[2 * 26], eps); // Median local quad, 2353th global quad
+      CHECK_REAL_EQUAL(200.6828, val[3 * 26], eps); // Last local quad, last global quad
+    }
   }
 }
 
-// We also read and write set variables lat and lon, which are are required to create the mesh
+// We also read and write set variables lat and lon, which are required to create the mesh
 // In test_homme_check_T(), we need to load the output file with mesh
 void test_homme_read_write_T()
 {
+  int procs = 1;
+#ifdef USE_MPI
+  MPI_Comm_size(MPI_COMM_WORLD, &procs);
+#endif
+  // Only test serial case for the time being
+  if (procs > 1)
+    return;
+
   Core moab;
   Interface& mb = moab;
 
-  std::string orig;
-  get_homme_options(orig);
+  std::string read_opts;
+  get_homme_read_options(read_opts);
 
   EntityHandle set;
   ErrorCode rval = mb.create_meshset(MESHSET_SET, set);
   CHECK_ERR(rval);
 
   // Load non-set variable T, set variable lat, set variable lon, and the mesh
-  std::string opts = orig + std::string(";DEBUG_IO=0;VARIABLE=T,lat,lon");
-  rval = mb.load_file(example_homme, &set, opts.c_str());
+  read_opts += ";DEBUG_IO=0;VARIABLE=T,lat,lon";
+  rval = mb.load_file(example_homme, &set, read_opts.c_str());
   CHECK_ERR(rval);
 
   // Write variables T, lat and lon
-  std::string writeopts;
-  writeopts = std::string(";;VARIABLE=T,lat,lon;DEBUG_IO=0;");
-  rval = mb.write_file("test_homme_T.nc", 0, writeopts.c_str(), &set, 1);
+  std::string write_opts = ";;VARIABLE=T,lat,lon;DEBUG_IO=0;";
+  rval = mb.write_file("test_homme_T.nc", 0, write_opts.c_str(), &set, 1);
   CHECK_ERR(rval);
 }
 
@@ -179,30 +231,31 @@ void test_homme_read_write_T()
 // Also check set variables lat and lon
 void test_homme_check_T()
 {
+  int procs = 1;
+#ifdef USE_MPI
+  MPI_Comm_size(MPI_COMM_WORLD, &procs);
+#endif
+  // Only test serial case for the time being
+  if (procs > 1)
+    return;
+
   Core moab;
   Interface& mb = moab;
 
-  std::string opts;
-  get_homme_options(opts);
+  std::string read_opts;
+  get_homme_read_options(read_opts);
 
   EntityHandle set;
   ErrorCode rval = mb.create_meshset(MESHSET_SET, set);
   CHECK_ERR(rval);
 
   // Load non-set variable T, set variable lat, set variable lon, and the mesh
-  opts += std::string(";VARIABLE=T,lat,lon");
-  opts += ";CONN=";
-  opts += example_homme_mapping;
-  rval = mb.load_file("test_homme_T.nc", &set, opts.c_str());
+  read_opts += ";VARIABLE=T,lat,lon";
+  read_opts += ";CONN=";
+  read_opts += example_homme_mapping;
+  rval = mb.load_file("test_homme_T.nc", &set, read_opts.c_str());
   CHECK_ERR(rval);
 
-  int procs = 1;
-#ifdef USE_MPI
-  ParallelComm* pcomm = ParallelComm::get_pcomm(&mb, 0);
-  procs = pcomm->proc_config().proc_size();
-#endif
-
-  // Only test serial case for the time being
   if (1 == procs) {
     // Get tag lat
     Tag lat_tag;
@@ -266,22 +319,22 @@ void test_homme_check_T()
   }
 }
 
-void get_eul_options(std::string& opts)
+void get_eul_read_options(std::string& opts)
 {
 #ifdef USE_MPI
   // Use parallel options
-  opts = std::string(";;PARALLEL=READ_PART;PARTITION_METHOD=SQIJ");
+  opts = ";;PARALLEL=READ_PART;PARTITION_METHOD=SQIJ";
 #else
-  opts = std::string(";;");
+  opts = ";;";
 #endif
 }
 
-void get_homme_options(std::string& opts)
+void get_homme_read_options(std::string& opts)
 {
 #ifdef USE_MPI
   // Use parallel options
-  opts = std::string(";;PARALLEL=READ_PART;PARTITION_METHOD=TRIVIAL");
+  opts = ";;PARALLEL=READ_PART;PARTITION_METHOD=TRIVIAL";
 #else
-  opts = std::string(";;");
+  opts = ";;";
 #endif
 }

Repository URL: https://bitbucket.org/fathomteam/moab/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.


More information about the moab-dev mailing list