[petsc-users] DM error? same code working on local but not on cray system

Zin Lin zinlin.zinlin at gmail.com
Wed Feb 20 22:16:35 CST 2019


Hi
I have a finite difference code based on petsc-3.6.4 working perfectly fine
on a local server (using about ~40 cpus)

but the exact same code and the exact same example on a cray system
compiled with petsc-3.7.4 is giving me the following error:

[0]PETSC ERROR: --------------------- Error Message
--------------------------------------------------------------
[0]PETSC ERROR: Argument out of range
[0]PETSC ERROR: Partition in y direction is too fine! 0 1
[0]PETSC ERROR: See http://www.mcs.anl.gov/petsc/documentation/faq.html for
trouble shooting.
[0]PETSC ERROR: Petsc Release Version 3.7.6, Apr, 24, 2017
[0]PETSC ERROR: ./farfieldopt_exec on a x86_64 named nid02531 by zinlin Wed
Feb 20 21:59:21 2019
[0]PETSC ERROR: Configure options --known-has-attribute-aligned=1
--known-mpi-int64_t=0 --known-bits-per-byte=8 --known-sdot-returns-double=0
--known-snrm2-returns-double=0 --known-level1-dcache-assoc=4
--known-level1-dcache-linesize=64 --known-level1-dcache-size=16384
--known-memcmp-ok=1 --known-mpi-c-double-complex=1
--known-mpi-long-double=0 --known-mpi-shared-libraries=0
--known-sizeof-MPI_Comm=4 --known-sizeof-MPI_Fint=4 --known-sizeof-char=1
--known-sizeof-double=8 --known-sizeof-float=4 --known-sizeof-int=4
--known-sizeof-long-long=8 --known-sizeof-long=8 --known-sizeof-short=2
--known-sizeof-size_t=8 --known-sizeof-void-p=8 --with-ar=ar --with-batch=1
--with-cc=cc --with-clib-autodetect=0 --with-cxx=CC
--with-cxxlib-autodetect=0 --with-debugging=0 --with-dependencies=0
--with-fc=ftn --with-fortran-datatypes=0 --with-fortran-interfaces=0
--with-fortranlib-autodetect=0 --with-ranlib=ranlib
--with-scalar-type=complex --with-shared-ld=ar --with-etags=0
--with-dependencies=0 --with-x=0 --with-ssl=0 --with-shared-libraries=0
--with-dependencies=0 --with-mpi-lib="[]" --with-mpi-include="[]"
--with-blas-lapack-lib="-L/opt/cray/libsci/17.06.1.1/CRAY/8.6/x86_64/lib
-lsci_cray_mp" --with-superlu_dist=1
--with-superlu_dist-include=/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/include
--with-superlu_dist-lib="-L/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/lib
-lsuperlu_dist-64" --with-parmetis=1
--with-parmetis-include=/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/include
--with-parmetis-lib="-L/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/lib
-lparmetis-64" --with-metis=1
--with-metis-include=/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/include
--with-metis-lib="-L/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/lib
-lmetis-64" --with-ptscotch=1
--with-ptscotch-include=/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/include
--with-ptscotch-lib="-L/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/lib
-lptscotch-64 -lscotch-64 -lptscotcherr-64 -lscotcherr-64"
--with-scalapack=1 --with-scalapack-include=/opt/cray/libsci/
17.06.1.1/CRAY/8.6/x86_64/include --with-scalapack-lib="-L/opt/cray/libsci/
17.06.1.1/CRAY/8.6/x86_64/lib -lsci_cray_mpi_mp -lsci_cray_mp"
--with-hdf5=1 --with-hdf5-include=/opt/cray/hdf5-parallel/
1.10.0.3/CRAY/8.6/include --with-hdf5-lib="-L/opt/cray/hdf5-parallel/
1.10.0.3/CRAY/8.6/lib -lhdf5_parallel -lz -ldl" --CFLAGS="-hnopattern -O2
-hpic" --CPPFLAGS="-hnomessage=11709 " --CXXFLAGS="-hnopattern -O2   -hpic"
--FFLAGS="-hnopattern -O2  -F -em -hnocaf  -hpic" --LIBS=
--CXX_LINKER_FLAGS= --PETSC_ARCH=x86_64 --prefix=/opt/cray/pe/petsc/
3.7.6.0/complex/CRAY64/8.6/x86_64 --with-64-bit-indices
[0]PETSC ERROR: #1 DMSetUp_DA_3D() line 298 in src/dm/impls/da/da3.c
[0]PETSC ERROR: #2 DMSetUp_DA() line 27 in src/dm/impls/da/dareg.c
[0]PETSC ERROR: #3 DMSetUp() line 744 in src/dm/interface/dm.c
[0]PETSC ERROR: #4 DMDACreate3d() line 1446 in src/dm/impls/da/da3.c
[0]PETSC ERROR: #5 setParDataGrid() line 176 in
/p/home/zinlin/Dev_3DOpt/library/initialize.c
[0]PETSC ERROR: --------------------- Error Message
--------------------------------------------------------------
[0]PETSC ERROR: Argument out of range
[0]PETSC ERROR: key 1099516865361 is greater than largest key allowed 751
[0]PETSC ERROR: See http://www.mcs.anl.gov/petsc/documentation/faq.html for
trouble shooting.
[0]PETSC ERROR: Petsc Release Version 3.7.6, Apr, 24, 2017
[0]PETSC ERROR: ./farfieldopt_exec on a x86_64 named nid02531 by zinlin Wed
Feb 20 21:59:21 2019
[0]PETSC ERROR: Configure options --known-has-attribute-aligned=1
--known-mpi-int64_t=0 --known-bits-per-byte=8 --known-sdot-returns-double=0
--known-snrm2-returns-double=0 --known-level1-dcache-assoc=4
--known-level1-dcache-linesize=64 --known-level1-dcache-size=16384
--known-memcmp-ok=1 --known-mpi-c-double-complex=1
--known-mpi-long-double=0 --known-mpi-shared-libraries=0
--known-sizeof-MPI_Comm=4 --known-sizeof-MPI_Fint=4 --known-sizeof-char=1
--known-sizeof-double=8 --known-sizeof-float=4 --known-sizeof-int=4
--known-sizeof-long-long=8 --known-sizeof-long=8 --known-sizeof-short=2
--known-sizeof-size_t=8 --known-sizeof-void-p=8 --with-ar=ar --with-batch=1
--with-cc=cc --with-clib-autodetect=0 --with-cxx=CC
--with-cxxlib-autodetect=0 --with-debugging=0 --with-dependencies=0
--with-fc=ftn --with-fortran-datatypes=0 --with-fortran-interfaces=0
--with-fortranlib-autodetect=0 --with-ranlib=ranlib
--with-scalar-type=complex --with-shared-ld=ar --with-etags=0
--with-dependencies=0 --with-x=0 --with-ssl=0 --with-shared-libraries=0
--with-dependencies=0 --with-mpi-lib="[]" --with-mpi-include="[]"
--with-blas-lapack-lib="-L/opt/cray/libsci/17.06.1.1/CRAY/8.6/x86_64/lib
-lsci_cray_mp" --with-superlu_dist=1
--with-superlu_dist-include=/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/include
--with-superlu_dist-lib="-L/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/lib
-lsuperlu_dist-64" --with-parmetis=1
--with-parmetis-include=/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/include
--with-parmetis-lib="-L/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/lib
-lparmetis-64" --with-metis=1
--with-metis-include=/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/include
--with-metis-lib="-L/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/lib
-lmetis-64" --with-ptscotch=1
--with-ptscotch-include=/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/include
--with-ptscotch-lib="-L/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/lib
-lptscotch-64 -lscotch-64 -lptscotcherr-64 -lscotcherr-64"
--with-scalapack=1 --with-scalapack-include=/opt/cray/libsci/
17.06.1.1/CRAY/8.6/x86_64/include --with-scalapack-lib="-L/opt/cray/libsci/
17.06.1.1/CRAY/8.6/x86_64/lib -lsci_cray_mpi_mp -lsci_cray_mp"
--with-hdf5=1 --with-hdf5-include=/opt/cray/hdf5-parallel/
1.10.0.3/CRAY/8.6/include --with-hdf5-lib="-L/opt/cray/hdf5-parallel/
1.10.0.3/CRAY/8.6/lib -lhdf5_parallel -lz -ldl" --CFLAGS="-hnopattern -O2
-hpic" --CPPFLAGS="-hnomessage=11709 " --CXXFLAGS="-hnopattern -O2   -hpic"
--FFLAGS="-hnopattern -O2  -F -em -hnocaf  -hpic" --LIBS=
--CXX_LINKER_FLAGS= --PETSC_ARCH=x86_64 --prefix=/opt/cray/pe/petsc/
3.7.6.0/complex/CRAY64/8.6/x86_64 --with-64-bit-indices
[0]PETSC ERROR: #6 PetscTableFind() line 142 in
/b/cray-petsc/.cray-build/CRAY/86/x86_64/cray-petsc-64-complex/petsc-3.7.6/./include/petscctable.h
[0]PETSC ERROR: #7 MatSetUpMultiply_MPIAIJ() line 33 in
src/mat/impls/aij/mpi/mmaij.c
[0]PETSC ERROR: #8 MatAssemblyEnd_MPIAIJ() line 747 in
src/mat/impls/aij/mpi/mpiaij.c
[0]PETSC ERROR: #9 MatAssemblyEnd() line 5194 in src/mat/interface/matrix.c
[0]PETSC ERROR:
------------------------------------------------------------------------
[0]PETSC ERROR: Caught signal number 11 SEGV: Segmentation Violation,
probably memory access out of range
[0]PETSC ERROR: Try option -start_in_debugger or -on_error_attach_debugger
[0]PETSC ERROR: or see
http://www.mcs.anl.gov/petsc/documentation/faq.html#valgrind
[0]PETSC ERROR: or try http://valgrind.org on GNU/linux and Apple Mac OS X
to find memory corruption errors
[0]PETSC ERROR: configure using --with-debugging=yes, recompile, link, and
run
[0]PETSC ERROR: to get more information on the crash.
[0]PETSC ERROR: --------------------- Error Message
--------------------------------------------------------------
[0]PETSC ERROR: Signal received
[0]PETSC ERROR: See http://www.mcs.anl.gov/petsc/documentation/faq.html for
trouble shooting.
[0]PETSC ERROR: Petsc Release Version 3.7.6, Apr, 24, 2017
[0]PETSC ERROR: ./farfieldopt_exec on a x86_64 named nid02531 by zinlin Wed
Feb 20 21:59:21 2019
[0]PETSC ERROR: Configure options --known-has-attribute-aligned=1
--known-mpi-int64_t=0 --known-bits-per-byte=8 --known-sdot-returns-double=0
--known-snrm2-returns-double=0 --known-level1-dcache-assoc=4
--known-level1-dcache-linesize=64 --known-level1-dcache-size=16384
--known-memcmp-ok=1 --known-mpi-c-double-complex=1
--known-mpi-long-double=0 --known-mpi-shared-libraries=0
--known-sizeof-MPI_Comm=4 --known-sizeof-MPI_Fint=4 --known-sizeof-char=1
--known-sizeof-double=8 --known-sizeof-float=4 --known-sizeof-int=4
--known-sizeof-long-long=8 --known-sizeof-long=8 --known-sizeof-short=2
--known-sizeof-size_t=8 --known-sizeof-void-p=8 --with-ar=ar --with-batch=1
--with-cc=cc --with-clib-autodetect=0 --with-cxx=CC
--with-cxxlib-autodetect=0 --with-debugging=0 --with-dependencies=0
--with-fc=ftn --with-fortran-datatypes=0 --with-fortran-interfaces=0
--with-fortranlib-autodetect=0 --with-ranlib=ranlib
--with-scalar-type=complex --with-shared-ld=ar --with-etags=0
--with-dependencies=0 --with-x=0 --with-ssl=0 --with-shared-libraries=0
--with-dependencies=0 --with-mpi-lib="[]" --with-mpi-include="[]"
--with-blas-lapack-lib="-L/opt/cray/libsci/17.06.1.1/CRAY/8.6/x86_64/lib
-lsci_cray_mp" --with-superlu_dist=1
--with-superlu_dist-include=/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/include
--with-superlu_dist-lib="-L/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/lib
-lsuperlu_dist-64" --with-parmetis=1
--with-parmetis-include=/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/include
--with-parmetis-lib="-L/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/lib
-lparmetis-64" --with-metis=1
--with-metis-include=/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/include
--with-metis-lib="-L/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/lib
-lmetis-64" --with-ptscotch=1
--with-ptscotch-include=/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/include
--with-ptscotch-lib="-L/opt/cray/tpsl/17.06.1/CRAY64/8.6/x86_64/lib
-lptscotch-64 -lscotch-64 -lptscotcherr-64 -lscotcherr-64"
--with-scalapack=1 --with-scalapack-include=/opt/cray/libsci/
17.06.1.1/CRAY/8.6/x86_64/include --with-scalapack-lib="-L/opt/cray/libsci/
17.06.1.1/CRAY/8.6/x86_64/lib -lsci_cray_mpi_mp -lsci_cray_mp"
--with-hdf5=1 --with-hdf5-include=/opt/cray/hdf5-parallel/
1.10.0.3/CRAY/8.6/include --with-hdf5-lib="-L/opt/cray/hdf5-parallel/
1.10.0.3/CRAY/8.6/lib -lhdf5_parallel -lz -ldl" --CFLAGS="-hnopattern -O2
-hpic" --CPPFLAGS="-hnomessage=11709 " --CXXFLAGS="-hnopattern -O2   -hpic"
--FFLAGS="-hnopattern -O2  -F -em -hnocaf  -hpic" --LIBS=
--CXX_LINKER_FLAGS= --PETSC_ARCH=x86_64 --prefix=/opt/cray/pe/petsc/
3.7.6.0/complex/CRAY64/8.6/x86_64 --with-64-bit-indices
[0]PETSC ERROR: #10 User provided function() line 0 in  unknown file
Rank 0 [Wed Feb 20 21:59:21 2019] [c13-0c0s8n3] application called
MPI_Abort(MPI_COMM_WORLD, 59) - process 0
_pmiu_daemon(SIGCHLD): [NID 02531] [c13-0c0s8n3] [Wed Feb 20 21:59:21 2019]
PE RANK 0 exit signal Aborted

now this is testing with just 1 cpu.
does it have something to do with DM having a problem because y dimension
is just one pixel (so a 2d problem though DMDAcreate3d was used) ?
but there was no problem with it before on the local server and petsc-3.6.4.
any help will be appreciated.
Thanks,
Zin

-- 
Zin Lin
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/petsc-users/attachments/20190220/829c8304/attachment-0001.html>


More information about the petsc-users mailing list