[mpich-discuss] *** glibc detected *** mpiexec: munmap_chunk(): invalid pointer: 0x00007fff1d578184 ***

Jeff Hammond jhammond at mcs.anl.gov
Sat Nov 6 09:08:18 CDT 2010


Yep, still happening when I use MPICH2-1.3 pulled via SVN yesterday
and compiled with GCC 4.6.  The test that fails is extremely simple.

Jeff


jeff at localhost:~/eclipse/a1/trunk/benchmarks/mpi> cat MPI_simple.c
#include <stdio.h>
#include <mpi.h>

int main(int argc, char **argv)
{
    int provided;
    int rank, size;
    MPI_Init_thread(&argc, &argv, MPI_THREAD_SINGLE, &provided);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Barrier(MPI_COMM_WORLD);
    printf("before MPI_Finalize\n"); fflush(stdout);
    MPI_Finalize();
    printf("after MPI_Finalize\n"); fflush(stdout);
    return(0);
}
jeff at localhost:~/eclipse/a1/trunk/benchmarks/mpi>
/software/mpich/mpich2-1.3-gnu46-build/bin/mpiexec -n 2 MPI_simple.x
before MPI_Finalize
after MPI_Finalize
before MPI_Finalize
after MPI_Finalize
*** glibc detected ***
/software/mpich/mpich2-1.3-gnu46-build/bin/mpiexec: munmap_chunk():
invalid pointer: 0x00007fffd654016b ***
======= Backtrace: =========
/lib64/libc.so.6[0x7f1cca7040c8]
/software/mpich/mpich2-1.3-gnu46-build/bin/mpiexec[0x421984]
/software/mpich/mpich2-1.3-gnu46-build/bin/mpiexec[0x40a07f]
/software/mpich/mpich2-1.3-gnu46-build/bin/mpiexec[0x40537f]
/lib64/libc.so.6(__libc_start_main+0xe6)[0x7f1cca6ae586]
/software/mpich/mpich2-1.3-gnu46-build/bin/mpiexec[0x404359]
======= Memory map: ========
00400000-00449000 r-xp 00000000 08:03 312441
  /software/mpich/mpich2-1.3-gnu46-build/bin/mpiexec.hydra
00648000-00649000 r--p 00048000 08:03 312441
  /software/mpich/mpich2-1.3-gnu46-build/bin/mpiexec.hydra
00649000-0064b000 rw-p 00049000 08:03 312441
  /software/mpich/mpich2-1.3-gnu46-build/bin/mpiexec.hydra
0064b000-00672000 rw-p 0064b000 00:00 0                                  [heap]
7f1cca479000-7f1cca48e000 r-xp 00000000 08:02 1417264
  /lib64/libgcc_s.so.1
7f1cca48e000-7f1cca68e000 ---p 00015000 08:02 1417264
  /lib64/libgcc_s.so.1
7f1cca68e000-7f1cca68f000 r--p 00015000 08:02 1417264
  /lib64/libgcc_s.so.1
7f1cca68f000-7f1cca690000 rw-p 00016000 08:02 1417264
  /lib64/libgcc_s.so.1
7f1cca690000-7f1cca7df000 r-xp 00000000 08:02 1417222
  /lib64/libc-2.9.so
7f1cca7df000-7f1cca9df000 ---p 0014f000 08:02 1417222
  /lib64/libc-2.9.so
7f1cca9df000-7f1cca9e3000 r--p 0014f000 08:02 1417222
  /lib64/libc-2.9.so
7f1cca9e3000-7f1cca9e4000 rw-p 00153000 08:02 1417222
  /lib64/libc-2.9.so
7f1cca9e4000-7f1cca9e9000 rw-p 7f1cca9e4000 00:00 0
7f1cca9e9000-7f1cca9ff000 r-xp 00000000 08:02 1417248
  /lib64/libpthread-2.9.so
7f1cca9ff000-7f1ccabff000 ---p 00016000 08:02 1417248
  /lib64/libpthread-2.9.so
7f1ccabff000-7f1ccac00000 r--p 00016000 08:02 1417248
  /lib64/libpthread-2.9.so
7f1ccac00000-7f1ccac01000 rw-p 00017000 08:02 1417248
  /lib64/libpthread-2.9.so
7f1ccac01000-7f1ccac05000 rw-p 7f1ccac01000 00:00 0
7f1ccac05000-7f1ccac0d000 r-xp 00000000 08:02 1417496
  /lib64/librt-2.9.so
7f1ccac0d000-7f1ccae0c000 ---p 00008000 08:02 1417496
  /lib64/librt-2.9.so
7f1ccae0c000-7f1ccae0d000 r--p 00007000 08:02 1417496
  /lib64/librt-2.9.so
7f1ccae0d000-7f1ccae0e000 rw-p 00008000 08:02 1417496
  /lib64/librt-2.9.so
7f1ccae0e000-7f1ccae23000 r-xp 00000000 08:02 1417457
  /lib64/libnsl-2.9.so
7f1ccae23000-7f1ccb022000 ---p 00015000 08:02 1417457
  /lib64/libnsl-2.9.so
7f1ccb022000-7f1ccb023000 r--p 00014000 08:02 1417457
  /lib64/libnsl-2.9.so
7f1ccb023000-7f1ccb024000 rw-p 00015000 08:02 1417457
  /lib64/libnsl-2.9.so
7f1ccb024000-7f1ccb026000 rw-p 7f1ccb024000 00:00 0
7f1ccb026000-7f1ccb07b000 r-xp 00000000 08:02 1417485
  /lib64/libm-2.9.so
7f1ccb07b000-7f1ccb27a000 ---p 00055000 08:02 1417485
  /lib64/libm-2.9.so
7f1ccb27a000-7f1ccb27b000 r--p 00054000 08:02 1417485
  /lib64/libm-2.9.so
7f1ccb27b000-7f1ccb27c000 rw-p 00055000 08:02 1417485
  /lib64/libm-2.9.so
7f1ccb27c000-7f1ccb291000 r-xp 00000000 08:02 1417270
  /lib64/libz.so.1.2.3
7f1ccb291000-7f1ccb490000 ---p 00015000 08:02 1417270
  /lib64/libz.so.1.2.3
7f1ccb490000-7f1ccb491000 r--p 00014000 08:02 1417270
  /lib64/libz.so.1.2.3
7f1ccb491000-7f1ccb492000 rw-p 00015000 08:02 1417270
  /lib64/libz.so.1.2.3
7f1ccb492000-7f1ccb494000 r-xp 00000000 08:02 1417482
  /lib64/libdl-2.9.so
7f1ccb494000-7f1ccb694000 ---p 00002000 08:02 1417482
  /lib64/libdl-2.9.so
7f1ccb694000-7f1ccb695000 r--p 00002000 08:02 1417482
  /lib64/libdl-2.9.so
7f1ccb695000-7f1ccb696000 rw-p 00003000 08:02 1417482
  /lib64/libdl-2.9.so
7f1ccb696000-7f1ccb7ea000 r-xp 00000000 08:02 521675
  /usr/lib64/libxml2.so.2.7.1
7f1ccb7ea000-7f1ccb9e9000 ---p 00154000 08:02 521675
  /usr/lib64/libxml2.so.2.7.1
7f1ccb9e9000-7f1ccb9f1000 r--p 00153000 08:02 521675
  /usr/lib64/libxml2.so.2.7.1
7f1ccb9f1000-7f1ccb9f3000 rw-p 0015b000 08:02 521675
  /usr/lib64/libxml2.so.2.7.1
7f1ccb9f3000-7f1ccb9f4000 rw-p 7f1ccb9f3000 00:00 0
7f1ccb9f4000-7f1ccba12000 r-xp 00000000 08:02 1420584
  /lib64/ld-2.9.so
7f1ccbbd3000-7f1ccbbd8000 rw-p 7f1ccbbd3000 00:00 0
7f1ccbc0f000-7f1ccbc11000 rw-p 7f1ccbc0f000 00:00 0
7f1ccbc11000-7f1ccbc12000 r--p 0001d000 08:02 1420584
  /lib64/ld-2.9.so
7f1ccbc12000-7f1ccbc13000 rw-p 0001e000 08:02 1420584
  /lib64/ld-2.9.so
7fffd651e000-7fffd6541000 rw-p 7ffffffdc000 00:00 0                      [stack]
7fffd65ff000-7fffd6600000 r-xp 7fffd65ff000 00:00 0                      [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0
  [vsyscall]
Aborted


On Fri, Nov 5, 2010 at 12:15 PM, Pavan Balaji <balaji at mcs.anl.gov> wrote:
>
> Did you try 1.3? The attached program works fine with trunk.
>
>  -- Pavan
>
> On 11/05/2010 11:21 AM, Jeff Hammond wrote:
>>
>> I'm seeing an error when parallel jobs run with MPICH2 1.3a1 terminate.
>>
>> I see the problem with NWChem (attached) but also with the trivial code
>> below.
>>
>> Jeff
>>
>>
>> ==========================================================
>> stdout and stderr
>> ==========================================================
>> jeff at localhost:~/eclipse/a1/trunk/benchmarks/mpi>  mpiexec -np 2
>> MPI_Allreduce_tests.x
>> Test 0: sum 1 out-of-place
>>    0: in=1 out=1 ans=1 PASSED
>> Test 1: sum 1 in-place
>>    0: in=1 out=1 ans=1 PASSED
>> Test 2: sum 2 out-of-place
>>    0: in=1 out=2 ans=2 PASSED
>> Test 3: sum 2 in-place
>>    0: in=1 out=2 ans=2 PASSED
>> Test 4: prod 1 out-of-place
>>    0: in=1 out=0 ans=0 PASSED
>> Test 5: prod 1 in-place
>>    0: in=1 out=0 ans=0 PASSED
>> Test 6: prod 2 out-of-place
>>    0: in=1 out=1 ans=1 PASSED
>> Test 7: prod 2 in-place
>>    0: in=1 out=1 ans=1 PASSED
>>    1: in=0 out=1 ans=1 PASSED
>>    1: in=0 out=1 ans=1 PASSED
>>    1: in=1 out=2 ans=2 PASSED
>>    1: in=1 out=2 ans=2 PASSED
>>    1: in=0 out=0 ans=0 PASSED
>>    1: in=0 out=0 ans=0 PASSED
>>    1: in=1 out=1 ans=1 PASSED
>>    1: in=1 out=1 ans=1 PASSED
>> *** glibc detected *** mpiexec: munmap_chunk(): invalid pointer:
>> 0x00007fffcb3ce169 ***
>> ======= Backtrace: =========
>> /lib64/libc.so.6[0x7f5415e5b0c8]
>> mpiexec[0x41908b]
>> mpiexec[0x403a97]
>> /lib64/libc.so.6(__libc_start_main+0xe6)[0x7f5415e05586]
>> mpiexec[0x402f29]
>> ======= Memory map: ========
>> 00400000-00426000 r-xp 00000000 08:03 2426704
>>   /software/mpich/mpich2-1.3a1-gnu-build/bin/mpiexec
>> 00625000-00626000 r--p 00025000 08:03 2426704
>>   /software/mpich/mpich2-1.3a1-gnu-build/bin/mpiexec
>> 00626000-00627000 rw-p 00026000 08:03 2426704
>>   /software/mpich/mpich2-1.3a1-gnu-build/bin/mpiexec
>> 00627000-0064a000 rw-p 00627000 00:00 0
>>  [heap]
>> 7f5415bd0000-7f5415be5000 r-xp 00000000 08:02 1417264
>>   /lib64/libgcc_s.so.1
>> 7f5415be5000-7f5415de5000 ---p 00015000 08:02 1417264
>>   /lib64/libgcc_s.so.1
>> 7f5415de5000-7f5415de6000 r--p 00015000 08:02 1417264
>>   /lib64/libgcc_s.so.1
>> 7f5415de6000-7f5415de7000 rw-p 00016000 08:02 1417264
>>   /lib64/libgcc_s.so.1
>> 7f5415de7000-7f5415f36000 r-xp 00000000 08:02 1417222
>>   /lib64/libc-2.9.so
>> 7f5415f36000-7f5416136000 ---p 0014f000 08:02 1417222
>>   /lib64/libc-2.9.so
>> 7f5416136000-7f541613a000 r--p 0014f000 08:02 1417222
>>   /lib64/libc-2.9.so
>> 7f541613a000-7f541613b000 rw-p 00153000 08:02 1417222
>>   /lib64/libc-2.9.so
>> 7f541613b000-7f5416140000 rw-p 7f541613b000 00:00 0
>> 7f5416140000-7f5416155000 r-xp 00000000 08:02 1417457
>>   /lib64/libnsl-2.9.so
>> 7f5416155000-7f5416354000 ---p 00015000 08:02 1417457
>>   /lib64/libnsl-2.9.so
>> 7f5416354000-7f5416355000 r--p 00014000 08:02 1417457
>>   /lib64/libnsl-2.9.so
>> 7f5416355000-7f5416356000 rw-p 00015000 08:02 1417457
>>   /lib64/libnsl-2.9.so
>> 7f5416356000-7f5416358000 rw-p 7f5416356000 00:00 0
>> 7f5416358000-7f5416376000 r-xp 00000000 08:02 1420584
>>   /lib64/ld-2.9.so
>> 7f541653a000-7f541653c000 rw-p 7f541653a000 00:00 0
>> 7f5416573000-7f5416575000 rw-p 7f5416573000 00:00 0
>> 7f5416575000-7f5416576000 r--p 0001d000 08:02 1420584
>>   /lib64/ld-2.9.so
>> 7f5416576000-7f5416577000 rw-p 0001e000 08:02 1420584
>>   /lib64/ld-2.9.so
>> 7fffcb3b9000-7fffcb3cf000 rw-p 7ffffffe9000 00:00 0
>>  [stack]
>> 7fffcb3ff000-7fffcb400000 r-xp 7fffcb3ff000 00:00 0
>>  [vdso]
>> ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0
>>   [vsyscall]
>> Aborted
>>
>>
>> ==========================================================
>> source
>> ==========================================================
>> /*
>>  * The following is a notice of limited availability of the code, and
>> disclaimer
>>  * which must be included in the prologue of the code and in all source
>> listings
>>  * of the code.
>>  *
>>  * Copyright (c) 2010  Argonne Leadership Computing Facility, Argonne
>> National
>>  * Laboratory
>>  *
>>  * Permission is hereby granted to use, reproduce, prepare derivative
>> works, and
>>  * to redistribute to others.
>>  *
>>  *
>>  *                          LICENSE
>>  *
>>  * Redistribution and use in source and binary forms, with or without
>>  * modification, are permitted provided that the following conditions are
>>  * met:
>>  *
>>  * - Redistributions of source code must retain the above copyright
>>  *   notice, this list of conditions and the following disclaimer.
>>  *
>>  * - Redistributions in binary form must reproduce the above copyright
>>  *   notice, this list of conditions and the following disclaimer listed
>>  *   in this license in the documentation and/or other materials
>>  *   provided with the distribution.
>>  *
>>  * - Neither the name of the copyright holders nor the names of its
>>  *   contributors may be used to endorse or promote products derived from
>>  *   this software without specific prior written permission.
>>  *
>>  * The copyright holders provide no reassurances that the source code
>>  * provided does not infringe any patent, copyright, or any other
>>  * intellectual property rights of third parties.  The copyright holders
>>  * disclaim any liability to any recipient for claims brought against
>>  * recipient by any third party for infringement of that parties
>>  * intellectual property rights.
>>  *
>>  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>>  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>>  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>>  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>>  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>>  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>>  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>>  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>>  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>>  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>>  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>>  */
>>
>> #include<stdio.h>
>> #include<stdlib.h>
>> #include<mpi.h>
>>
>> int main(int argc, char **argv)
>> {
>>     int provided;
>>
>>     int rank, size;
>>
>>     int count;
>>
>>     int in, out, ans;
>>
>>     MPI_Init_thread(&argc,&argv, MPI_THREAD_MULTIPLE,&provided);
>>
>>     MPI_Comm_rank(MPI_COMM_WORLD,&rank);
>>     MPI_Comm_size(MPI_COMM_WORLD,&size);
>>
>>     MPI_Barrier(MPI_COMM_WORLD);
>>
>>     /* TESTING SUM OPERATOR */
>>
>>     if (rank==0) printf("Test %d: sum 1 out-of-place\n",count++);
>>     in  = ( rank==0 ? 1 : 0 );
>>     out = 0;
>>     ans = 1;
>>     MPI_Allreduce(&in,&out,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD);
>>     if (out==ans) printf("%4d: in=%d out=%d ans=%d PASSED
>> \n",rank,in,out,ans);
>>     else          printf("%4d: in=%d out=%d ans=%d FAILED
>> \n",rank,in,out,ans);
>>     fflush(stdout);
>>
>>     if (rank==0) printf("Test %d: sum 1 in-place\n",count++);
>>     in  = ( rank==0 ? 1 : 0 );
>>     out = in;
>>     ans = 1;
>>     MPI_Allreduce(MPI_IN_PLACE,&out,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD);
>>     if (out==ans) printf("%4d: in=%d out=%d ans=%d PASSED
>> \n",rank,in,out,ans);
>>     else          printf("%4d: in=%d out=%d ans=%d FAILED
>> \n",rank,in,out,ans);
>>     fflush(stdout);
>>
>>     if (rank==0) printf("Test %d: sum 2 out-of-place\n",count++);
>>     in  = 1;
>>     out = 0;
>>     ans = size;
>>     MPI_Allreduce(&in,&out,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD);
>>     if (out==ans) printf("%4d: in=%d out=%d ans=%d PASSED
>> \n",rank,in,out,ans);
>>     else          printf("%4d: in=%d out=%d ans=%d FAILED
>> \n",rank,in,out,ans);
>>     fflush(stdout);
>>
>>     if (rank==0) printf("Test %d: sum 2 in-place\n",count++);
>>     in  = 1;
>>     out = in;
>>     ans = size;
>>     MPI_Allreduce(MPI_IN_PLACE,&out,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD);
>>     if (out==ans) printf("%4d: in=%d out=%d ans=%d PASSED
>> \n",rank,in,out,ans);
>>     else          printf("%4d: in=%d out=%d ans=%d FAILED
>> \n",rank,in,out,ans);
>>     fflush(stdout);
>>
>>     /* TESTING PROD OPERATOR */
>>
>>     if (rank==0) printf("Test %d: prod 1 out-of-place\n",count++);
>>     in  = ( rank==0 ? 1 : 0 );
>>     out = 0;
>>     ans = 0;
>>     MPI_Allreduce(&in,&out,1,MPI_INT,MPI_PROD,MPI_COMM_WORLD);
>>     if (out==ans) printf("%4d: in=%d out=%d ans=%d PASSED
>> \n",rank,in,out,ans);
>>     else          printf("%4d: in=%d out=%d ans=%d FAILED
>> \n",rank,in,out,ans);
>>     fflush(stdout);
>>
>>     if (rank==0) printf("Test %d: prod 1 in-place\n",count++);
>>     in  = ( rank==0 ? 1 : 0 );
>>     out = in;
>>     ans = 0;
>>     MPI_Allreduce(MPI_IN_PLACE,&out,1,MPI_INT,MPI_PROD,MPI_COMM_WORLD);
>>     if (out==ans) printf("%4d: in=%d out=%d ans=%d PASSED
>> \n",rank,in,out,ans);
>>     else          printf("%4d: in=%d out=%d ans=%d FAILED
>> \n",rank,in,out,ans);;
>>     fflush(stdout);
>>
>>     if (rank==0) printf("Test %d: prod 2 out-of-place\n",count++);
>>     in  = 1;
>>     out = 0;
>>     ans = 1;
>>     MPI_Allreduce(&in,&out,1,MPI_INT,MPI_PROD,MPI_COMM_WORLD);
>>     if (out==ans) printf("%4d: in=%d out=%d ans=%d PASSED
>> \n",rank,in,out,ans);
>>     else          printf("%4d: in=%d out=%d ans=%d FAILED
>> \n",rank,in,out,ans);
>>     fflush(stdout);
>>
>>     if (rank==0) printf("Test %d: prod 2 in-place\n",count++);
>>     in  = 1;
>>     out = in;
>>     ans = 1;
>>     MPI_Allreduce(MPI_IN_PLACE,&out,1,MPI_INT,MPI_PROD,MPI_COMM_WORLD);
>>     if (out==ans) printf("%4d: in=%d out=%d ans=%d PASSED
>> \n",rank,in,out,ans);
>>     else          printf("%4d: in=%d out=%d ans=%d FAILED
>> \n",rank,in,out,ans);
>>     fflush(stdout);
>>
>>     /* END OF TESTS */
>>
>>     MPI_Finalize();
>>
>>     return 0;
>> }
>> ==========================================================
>>
>
> --
> Pavan Balaji
> http://www.mcs.anl.gov/~balaji
>



-- 
Jeff Hammond
Argonne Leadership Computing Facility
jhammond at alcf.anl.gov / (630) 252-5381
http://www.linkedin.com/in/jeffhammond


More information about the mpich-discuss mailing list