[MPICH] mpich 1.2.7 hangs on MPID_P4_Init
Adam Zhang
Adam.Zhang at Sun.com
Fri Jun 29 00:37:48 CDT 2007
Hi all,
I am using mpqc (version 2.3.1) with mpich 1.2.7p1. When I run one of
mpqc test example, I find it hangs on MPID_P4_Init. I wonder if this is
a problem of MPI Sends and Receives. Below is the stack of running process:
Stack:
adam at bishop # pgrep mpqc
29125
29126
29130
29129
adam at bishop # ptree 29125
411 /usr/lib/ssh/sshd
28740 /usr/lib/ssh/sshd
28747 /usr/lib/ssh/sshd
28749 -bash
29064 /bin/sh /export/home/adam/local/bin/mpirun -np 2 bin/mpqc
data/h2
29125 /export/home/adam/amanda/bin/mpqc
data/h2o_mp200sto3gc1.in -p4p
29126 /export/home/adam/amanda/bin/mpqc
data/h2o_mp200sto3gc1.in -p
29127 rsh bishop -l adam -n
/export/home/adam/amanda/bin/mpqc bisho
adam at bishop # ptree 29129
291 /usr/lib/inet/inetd start
29128 /usr/sbin/in.rshd
29129 /export/home/adam/amanda/bin/mpqc bishop 34021 -p4amslave
-p4yourname
29130 /export/home/adam/amanda/bin/mpqc bishop 34021 -p4amslave
-p4yourna
adam at bishop # pstack 29126 | /opt/SUNWspro/bin/c++filt
29126: /export/home/adam/amanda/bin/mpqc data/h2o_mp200sto3gc1.in -p4pg
/expo
fda20a57 pollsys (8044ee0, 2, 0, 0)
fd9cee0a pselect (9, 8044fc0, fda49868, fda49868, 0, 0) + 18e
fd9cf100 select (9, 8044fc0, 0, 0, 0) + 82
0856a838 listener (8047b6c, 8728648, 8761218, fda48000, 8045084,
fd9c3023) + 238
0856370f create_bm_processes (87af588) + 50f
085630f5 p4_startup (87af588) + 125
08562fae p4_create_procgroup (8047b6c, 8728648, 8761218, 8761218,
8045158, 8581fd6) + 8e
0856fd4a MPID_P4_Init (8047b6c, 8047b70) + 4a
0856efcf MPID_CH_InitMsgPass (8047b6c, 8047b70, 4000, 1f400) + ef
0856c235 MPID_Init (8047b6c, 8047b70, 0, 8045260) + 1f5
0854a120 MPIR_Init (8047b6c, 8047b70) + 130
08549fd6 MPI_Init_thread (8047b6c, 8047b70, 1, 80452ec) + 26
0851da81 void sc::MPIMessageGrp::init(int,int*,char***) (8761218, 5b,
8047b6c, 8047b70) + 19d
0851d61e sc::MPIMessageGrp::MPIMessageGrp #Nvariant 1(int*,char***)
(8761218, 8047b6c, 8047b70) + 46
08131c2a int try_main(int,char**) (2, 8047bcc) + 10a
08137aae main (6, 8047bcc, 8047be8) + 1a
08130c1a _start (6, 8047cb4, 8047cd6, 0, 8047cf5, 0) + 7a
adam at bishop # pstack 29130 | /opt/SUNWspro/bin/c++filt
29130: /export/home/adam/amanda/bin/mpqc bishop 34021 -p4amslave
-p4yourname
fda20a57 pollsys (80432e0, 2, 0, 0)
fd9cee0a pselect (a, 80433cc, fda49868, fda49868, 0, 0) + 18e
fd9cf100 select (a, 80433cc, 0, 0, 0) + 82
0856a838 listener (8047ddc, 8728648, 8761218, 0, 0, 0) + 238
08564ba3 create_rm_processes (1, 6) + 653
085642ed rm_start (8047ddc, 8047e3c) + 43d
085610f5 p4_initenv (8047ddc, 8047e3c) + 185
0856fd24 MPID_P4_Init (8047ddc, 8047de0) + 24
0856efcf MPID_CH_InitMsgPass (8047ddc, 8047de0, 4000, 1f400) + ef
0856c235 MPID_Init (8047ddc, 8047de0, 0, 80454d0) + 1f5
0854a120 MPIR_Init (8047ddc, 8047de0) + 130
08549fd6 MPI_Init_thread (8047ddc, 8047de0, 1, 804555c) + 26
0851da81 void sc::MPIMessageGrp::init(int,int*,char***) (8761218, 5b,
8047ddc, 8047de0) + 19d
0851d61e sc::MPIMessageGrp::MPIMessageGrp #Nvariant 1(int*,char***)
(8761218, 8047ddc, 8047de0) + 46
08131c2a int try_main(int,char**) (4, 8047e3c) + 10a
08137aae main (8, 8047e3c, 8047e60) + 1a
08130c1a _start (8, 8047eec, 8047f0e, 8047f15, 8047f1b, 0) + 7a
adam at bishop # pstack 29125 | /opt/SUNWspro/bin/c++filt
29125: /export/home/adam/amanda/bin/mpqc data/h2o_mp200sto3gc1.in -p4pg
/expo
----------------- lwp# 1 / thread# 1 --------------------
fda20d87 lwp_wait (2, 803e7f4)
fda1cfd2 _thrp_join (2, 0, 803e844, 1) + 5a
fda1d151 pthread_join (2, 803e844) + 2b
08522d33 int sc::PthreadThreadGrp::wait_threads() (87719b8) + 3f
08522630 void sc::MTMPIMemoryGrp::deactivate() (8813e60) + a8
08522675 void sc::MTMPIMemoryGrp::sync() (8813e60) + 3d
08152c4c void sc::MBPT2::compute_cs_grad() (8811940) + 1a58
0814dbc4 void sc::MBPT2::compute() (8811940) + 43c
0852660a void sc::AccResultInfo::update() (8811980) + 36
0844a256 double sc::Function::value() (8811940) + 16
083c8080 double sc::MolecularEnergy::energy() (8811940) + 14
0813505f int try_main(int,char**) (2, 8047bcc) + 353f
08137aae main (6, 8047bcc, 8047be8) + 1a
08130c1a _start (6, 8047cb4, 8047cd6, 0, 8047cf5, 0) + 7a
----------------- lwp# 2 / thread# 2 --------------------
fda20a57 pollsys (fd5e7bd0, 1, fd5e7c68, 0)
fd9cee0a pselect (8, fd5e7cac, fda49868, fda49868, fd5e7c68, 0) + 18e
fd9cf100 select (8, fd5e7cac, 0, 0, fd5e7d2c) + 82
08569403 socket_recv (1) + 1c3
0857b3b3 recv_message (873ec78, 873ec74) + 33
0857b1af p4_recv (873ec78, 873ec74, fd5e7dc8, 873efbc) + 6f
0858234b MPID_CH_Check_incoming (8761290, 1) + 2ab
0857e079 MPID_RecvComplete (fd5ebe5c, fd5ebf80, fd5ebf20) + d9
0856d2cd MPID_RecvDatatype (8814170, fd5ebf68, 18, 873ea60, fffffffe,
3a99) + 8d
08549be8 MPI_Recv (fd5ebf68, 18, 3, fffffffe, 3a99, 89) + 218
0852122d int sc::MTMPIThread::run_one() (881d398) + 2d
085211f4 void sc::MTMPIThread::run() (881d398) + 10
08519f95 void*sc::Thread::run_Thread_run(void*) (881d398) + 15
08519ece Thread__run_Thread_run (881d398) + e
fda1fd36 _thr_setup (fd8e2400) + 4e
fda20020 _lwp_start (fd8e2400, 0, 0, fd5ebff8, fda20020, fd8e2400)
adam at bishop #
adam at bishop # pstack 29129 | /opt/SUNWspro/bin/c++filt
29129: /export/home/adam/amanda/bin/mpqc bishop 34021 -p4amslave
-p4yourname
----------------- lwp# 1 / thread# 1 --------------------
fda20d87 lwp_wait (2, 803ea64)
fda1cfd2 _thrp_join (2, 0, 803eab4, 1) + 5a
fda1d151 pthread_join (2, 803eab4) + 2b
08522d33 int sc::PthreadThreadGrp::wait_threads() (87719b8) + 3f
08522630 void sc::MTMPIMemoryGrp::deactivate() (87bb918) + a8
08522675 void sc::MTMPIMemoryGrp::sync() (87bb918) + 3d
08152c4c void sc::MBPT2::compute_cs_grad() (87beeb0) + 1a58
0814dbc4 void sc::MBPT2::compute() (87beeb0) + 43c
0852660a void sc::AccResultInfo::update() (87beef0) + 36
0844a256 double sc::Function::value() (87beeb0) + 16
083c8080 double sc::MolecularEnergy::energy() (87beeb0) + 14
0813505f int try_main(int,char**) (2, 877ece8) + 353f
08137aae main (8, 8047e3c, 8047e60) + 1a
08130c1a _start (8, 8047eec, 8047f0e, 8047f15, 8047f1b, 0) + 7a
----------------- lwp# 2 / thread# 2 --------------------
fda20a57 pollsys (fd5e7be0, 1, fd5e7c68, 0)
fd9cee0a pselect (7, fd5e7cac, fda49868, fda49868, fd5e7c68, 0) + 18e
fd9cf100 select (7, fd5e7cac, 0, 0, fd5e7d2c) + 82
08569403 socket_recv (1) + 1c3
0857b3b3 recv_message (873ec78, 873ec74) + 33
0857b1af p4_recv (873ec78, 873ec74, fd5e7dc8, 873efbc) + 6f
0858234b MPID_CH_Check_incoming (8761290, 1) + 2ab
0857e079 MPID_RecvComplete (fd5ebe5c, fd5ebf80, fd5ebf20) + d9
0856d2cd MPID_RecvDatatype (87bbc28, fd5ebf68, 18, 873ea60, fffffffe,
3a99) + 8d
08549be8 MPI_Recv (fd5ebf68, 18, 3, fffffffe, 3a99, 89) + 218
0852122d int sc::MTMPIThread::run_one() (87c77c0) + 2d
085211f4 void sc::MTMPIThread::run() (87c77c0) + 10
08519f95 void*sc::Thread::run_Thread_run(void*) (87c77c0) + 15
08519ece Thread__run_Thread_run (87c77c0) + e
fda1fd36 _thr_setup (fd8e2400) + 4e
fda20020 _lwp_start (fd8e2400, 0, 0, fd5ebff8, fda20020, fd8e2400)
Environment: Solaris 10 for AMD64.
Compiler: Sun Studio 11
The MPQC configure command is :
./configure --with-cc="mpicc" --with-cxx="mpicxx" --with-f77="mpif77"
--with-libs="-lsunperf -lmpich" --with-libdirs='-L/opt/mpich/lib'
--prefix=/opt/mpqc --enable-always-use-mpi --with-default-parallel=mpi
--with-mpi-thread="funneled"
Regards,
Adam
More information about the mpich-discuss
mailing list