[mpich-discuss] *** glibc detected *** mpiexec: munmap_chunk(): invalid pointer: 0x00007fff1d578184 ***

Pavan Balaji balaji at mcs.anl.gov
Fri Nov 5 12:15:47 CDT 2010


Did you try 1.3? The attached program works fine with trunk.

  -- Pavan

On 11/05/2010 11:21 AM, Jeff Hammond wrote:
> I'm seeing an error when parallel jobs run with MPICH2 1.3a1 terminate.
>
> I see the problem with NWChem (attached) but also with the trivial code below.
>
> Jeff
>
>
> ==========================================================
> stdout and stderr
> ==========================================================
> jeff at localhost:~/eclipse/a1/trunk/benchmarks/mpi>  mpiexec -np 2
> MPI_Allreduce_tests.x
> Test 0: sum 1 out-of-place
>     0: in=1 out=1 ans=1 PASSED
> Test 1: sum 1 in-place
>     0: in=1 out=1 ans=1 PASSED
> Test 2: sum 2 out-of-place
>     0: in=1 out=2 ans=2 PASSED
> Test 3: sum 2 in-place
>     0: in=1 out=2 ans=2 PASSED
> Test 4: prod 1 out-of-place
>     0: in=1 out=0 ans=0 PASSED
> Test 5: prod 1 in-place
>     0: in=1 out=0 ans=0 PASSED
> Test 6: prod 2 out-of-place
>     0: in=1 out=1 ans=1 PASSED
> Test 7: prod 2 in-place
>     0: in=1 out=1 ans=1 PASSED
>     1: in=0 out=1 ans=1 PASSED
>     1: in=0 out=1 ans=1 PASSED
>     1: in=1 out=2 ans=2 PASSED
>     1: in=1 out=2 ans=2 PASSED
>     1: in=0 out=0 ans=0 PASSED
>     1: in=0 out=0 ans=0 PASSED
>     1: in=1 out=1 ans=1 PASSED
>     1: in=1 out=1 ans=1 PASSED
> *** glibc detected *** mpiexec: munmap_chunk(): invalid pointer:
> 0x00007fffcb3ce169 ***
> ======= Backtrace: =========
> /lib64/libc.so.6[0x7f5415e5b0c8]
> mpiexec[0x41908b]
> mpiexec[0x403a97]
> /lib64/libc.so.6(__libc_start_main+0xe6)[0x7f5415e05586]
> mpiexec[0x402f29]
> ======= Memory map: ========
> 00400000-00426000 r-xp 00000000 08:03 2426704
>    /software/mpich/mpich2-1.3a1-gnu-build/bin/mpiexec
> 00625000-00626000 r--p 00025000 08:03 2426704
>    /software/mpich/mpich2-1.3a1-gnu-build/bin/mpiexec
> 00626000-00627000 rw-p 00026000 08:03 2426704
>    /software/mpich/mpich2-1.3a1-gnu-build/bin/mpiexec
> 00627000-0064a000 rw-p 00627000 00:00 0                                  [heap]
> 7f5415bd0000-7f5415be5000 r-xp 00000000 08:02 1417264
>    /lib64/libgcc_s.so.1
> 7f5415be5000-7f5415de5000 ---p 00015000 08:02 1417264
>    /lib64/libgcc_s.so.1
> 7f5415de5000-7f5415de6000 r--p 00015000 08:02 1417264
>    /lib64/libgcc_s.so.1
> 7f5415de6000-7f5415de7000 rw-p 00016000 08:02 1417264
>    /lib64/libgcc_s.so.1
> 7f5415de7000-7f5415f36000 r-xp 00000000 08:02 1417222
>    /lib64/libc-2.9.so
> 7f5415f36000-7f5416136000 ---p 0014f000 08:02 1417222
>    /lib64/libc-2.9.so
> 7f5416136000-7f541613a000 r--p 0014f000 08:02 1417222
>    /lib64/libc-2.9.so
> 7f541613a000-7f541613b000 rw-p 00153000 08:02 1417222
>    /lib64/libc-2.9.so
> 7f541613b000-7f5416140000 rw-p 7f541613b000 00:00 0
> 7f5416140000-7f5416155000 r-xp 00000000 08:02 1417457
>    /lib64/libnsl-2.9.so
> 7f5416155000-7f5416354000 ---p 00015000 08:02 1417457
>    /lib64/libnsl-2.9.so
> 7f5416354000-7f5416355000 r--p 00014000 08:02 1417457
>    /lib64/libnsl-2.9.so
> 7f5416355000-7f5416356000 rw-p 00015000 08:02 1417457
>    /lib64/libnsl-2.9.so
> 7f5416356000-7f5416358000 rw-p 7f5416356000 00:00 0
> 7f5416358000-7f5416376000 r-xp 00000000 08:02 1420584
>    /lib64/ld-2.9.so
> 7f541653a000-7f541653c000 rw-p 7f541653a000 00:00 0
> 7f5416573000-7f5416575000 rw-p 7f5416573000 00:00 0
> 7f5416575000-7f5416576000 r--p 0001d000 08:02 1420584
>    /lib64/ld-2.9.so
> 7f5416576000-7f5416577000 rw-p 0001e000 08:02 1420584
>    /lib64/ld-2.9.so
> 7fffcb3b9000-7fffcb3cf000 rw-p 7ffffffe9000 00:00 0                      [stack]
> 7fffcb3ff000-7fffcb400000 r-xp 7fffcb3ff000 00:00 0                      [vdso]
> ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0
>    [vsyscall]
> Aborted
>
>
> ==========================================================
> source
> ==========================================================
> /*
>   * The following is a notice of limited availability of the code, and disclaimer
>   * which must be included in the prologue of the code and in all source listings
>   * of the code.
>   *
>   * Copyright (c) 2010  Argonne Leadership Computing Facility, Argonne National
>   * Laboratory
>   *
>   * Permission is hereby granted to use, reproduce, prepare derivative works, and
>   * to redistribute to others.
>   *
>   *
>   *                          LICENSE
>   *
>   * Redistribution and use in source and binary forms, with or without
>   * modification, are permitted provided that the following conditions are
>   * met:
>   *
>   * - Redistributions of source code must retain the above copyright
>   *   notice, this list of conditions and the following disclaimer.
>   *
>   * - Redistributions in binary form must reproduce the above copyright
>   *   notice, this list of conditions and the following disclaimer listed
>   *   in this license in the documentation and/or other materials
>   *   provided with the distribution.
>   *
>   * - Neither the name of the copyright holders nor the names of its
>   *   contributors may be used to endorse or promote products derived from
>   *   this software without specific prior written permission.
>   *
>   * The copyright holders provide no reassurances that the source code
>   * provided does not infringe any patent, copyright, or any other
>   * intellectual property rights of third parties.  The copyright holders
>   * disclaim any liability to any recipient for claims brought against
>   * recipient by any third party for infringement of that parties
>   * intellectual property rights.
>   *
>   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>   * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>   * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>   * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>   * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>   * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>   * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>   * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>   */
>
> #include<stdio.h>
> #include<stdlib.h>
> #include<mpi.h>
>
> int main(int argc, char **argv)
> {
>      int provided;
>
>      int rank, size;
>
>      int count;
>
>      int in, out, ans;
>
>      MPI_Init_thread(&argc,&argv, MPI_THREAD_MULTIPLE,&provided);
>
>      MPI_Comm_rank(MPI_COMM_WORLD,&rank);
>      MPI_Comm_size(MPI_COMM_WORLD,&size);
>
>      MPI_Barrier(MPI_COMM_WORLD);
>
>      /* TESTING SUM OPERATOR */
>
>      if (rank==0) printf("Test %d: sum 1 out-of-place\n",count++);
>      in  = ( rank==0 ? 1 : 0 );
>      out = 0;
>      ans = 1;
>      MPI_Allreduce(&in,&out,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD);
>      if (out==ans) printf("%4d: in=%d out=%d ans=%d PASSED \n",rank,in,out,ans);
>      else          printf("%4d: in=%d out=%d ans=%d FAILED \n",rank,in,out,ans);
>      fflush(stdout);
>
>      if (rank==0) printf("Test %d: sum 1 in-place\n",count++);
>      in  = ( rank==0 ? 1 : 0 );
>      out = in;
>      ans = 1;
>      MPI_Allreduce(MPI_IN_PLACE,&out,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD);
>      if (out==ans) printf("%4d: in=%d out=%d ans=%d PASSED \n",rank,in,out,ans);
>      else          printf("%4d: in=%d out=%d ans=%d FAILED \n",rank,in,out,ans);
>      fflush(stdout);
>
>      if (rank==0) printf("Test %d: sum 2 out-of-place\n",count++);
>      in  = 1;
>      out = 0;
>      ans = size;
>      MPI_Allreduce(&in,&out,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD);
>      if (out==ans) printf("%4d: in=%d out=%d ans=%d PASSED \n",rank,in,out,ans);
>      else          printf("%4d: in=%d out=%d ans=%d FAILED \n",rank,in,out,ans);
>      fflush(stdout);
>
>      if (rank==0) printf("Test %d: sum 2 in-place\n",count++);
>      in  = 1;
>      out = in;
>      ans = size;
>      MPI_Allreduce(MPI_IN_PLACE,&out,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD);
>      if (out==ans) printf("%4d: in=%d out=%d ans=%d PASSED \n",rank,in,out,ans);
>      else          printf("%4d: in=%d out=%d ans=%d FAILED \n",rank,in,out,ans);
>      fflush(stdout);
>
>      /* TESTING PROD OPERATOR */
>
>      if (rank==0) printf("Test %d: prod 1 out-of-place\n",count++);
>      in  = ( rank==0 ? 1 : 0 );
>      out = 0;
>      ans = 0;
>      MPI_Allreduce(&in,&out,1,MPI_INT,MPI_PROD,MPI_COMM_WORLD);
>      if (out==ans) printf("%4d: in=%d out=%d ans=%d PASSED \n",rank,in,out,ans);
>      else          printf("%4d: in=%d out=%d ans=%d FAILED \n",rank,in,out,ans);
>      fflush(stdout);
>
>      if (rank==0) printf("Test %d: prod 1 in-place\n",count++);
>      in  = ( rank==0 ? 1 : 0 );
>      out = in;
>      ans = 0;
>      MPI_Allreduce(MPI_IN_PLACE,&out,1,MPI_INT,MPI_PROD,MPI_COMM_WORLD);
>      if (out==ans) printf("%4d: in=%d out=%d ans=%d PASSED \n",rank,in,out,ans);
>      else          printf("%4d: in=%d out=%d ans=%d FAILED \n",rank,in,out,ans);;
>      fflush(stdout);
>
>      if (rank==0) printf("Test %d: prod 2 out-of-place\n",count++);
>      in  = 1;
>      out = 0;
>      ans = 1;
>      MPI_Allreduce(&in,&out,1,MPI_INT,MPI_PROD,MPI_COMM_WORLD);
>      if (out==ans) printf("%4d: in=%d out=%d ans=%d PASSED \n",rank,in,out,ans);
>      else          printf("%4d: in=%d out=%d ans=%d FAILED \n",rank,in,out,ans);
>      fflush(stdout);
>
>      if (rank==0) printf("Test %d: prod 2 in-place\n",count++);
>      in  = 1;
>      out = in;
>      ans = 1;
>      MPI_Allreduce(MPI_IN_PLACE,&out,1,MPI_INT,MPI_PROD,MPI_COMM_WORLD);
>      if (out==ans) printf("%4d: in=%d out=%d ans=%d PASSED \n",rank,in,out,ans);
>      else          printf("%4d: in=%d out=%d ans=%d FAILED \n",rank,in,out,ans);
>      fflush(stdout);
>
>      /* END OF TESTS */
>
>      MPI_Finalize();
>
>      return 0;
> }
> ==========================================================
>

-- 
Pavan Balaji
http://www.mcs.anl.gov/~balaji


More information about the mpich-discuss mailing list