[mpich-discuss] Mpich2: problems with communications
The Source
thesourcehim at gmail.com
Wed Sep 17 10:46:07 CDT 2008
cpi starts and writes the following:
Process 0 of 2 is on WARZONE
Process 1 of 2 is on F9Virtual64
Nothing more. It never exits. I didn't pass any parameters to cpi. Should I?
MPI_Bcast(&size, 1, MPI_INT, 0, MPI_COMM_WORLD) returns success, my program then prints that it is about to call next broadcast, but that's it, next broadcast never returns.
My second node is virtual Fedora 9 x86_64 (VMWare), running on server node (real Fedora 9 x86_64). Could it be the problem?
Rajeev Thakur ?????:
> Does the cpi example from the examples directory run?
>
> Does this program fail after the very first broadcast, i.e.,
>
>> if(MPI_Bcast(&size, 1, MPI_INT, 0, MPI_COMM_WORLD)!=MPI_SUCCESS)
>>
>
> Rajeev
>
>
>> -----Original Message-----
>> From: owner-mpich-discuss at mcs.anl.gov
>> [mailto:owner-mpich-discuss at mcs.anl.gov] On Behalf Of The Source
>> Sent: Wednesday, September 17, 2008 7:45 AM
>> To: mpich-discuss at mcs.anl.gov
>> Subject: [mpich-discuss] Mpich2: problems with communications
>>
>> I'm using mpich2 1.0.7. I installed and configured it on 2
>> machines, started daemons.
>> The problems begin when I launch my program: first MPI_Bcast
>> succeeds, but the second one (or any other communication
>> function) hangs. Source code is attached.
>>
>> Also when I call MPI_Bcast, second machine deamon prints the
>> following:
>> F9Virtual64_mpdman_1 (run282): invalid msg from lhs;
>> expecting ringsize
>> got: {}
>>
>> Can anyone look at the code and tell me what's wrong?
>>
>> #include <mpi.h>
>> #include <stdio.h>
>> #include <string.h>
>> #include <math.h>
>> #include <sys/time.h>
>>
>> int main(int arg_c, char** arg_v)
>> {
>> if(arg_c<2)
>> {
>> printf("Input file not specified\n");
>> return 0;
>> }
>> if(MPI_Init(&arg_c, &arg_v)!=MPI_SUCCESS)
>> {
>> printf("MPI initialization failed\n");
>> return 0;
>> }
>> int ProcNum, ProcRank;
>> MPI_Comm_size(MPI_COMM_WORLD, &ProcNum);
>> MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank);
>>
>> int size;
>> double e;
>> double* A;
>> double* b;
>> double* x, *xprev;
>>
>> FILE* f=0;
>> if(!ProcRank)
>> {
>> f=fopen(arg_v[1], "r");
>> if(!f)
>> {
>> printf("Error opening input file\n");
>> MPI_Finalize();
>> return 0;
>> }
>> fscanf(f, "%d", &size);
>> printf("Enter desired accuracy\n");
>> scanf("%lf", &e);
>> }
>> if(!ProcRank)
>> printf("Broadcasting matrix size\n");
>> if(MPI_Bcast(&size, 1, MPI_INT, 0, MPI_COMM_WORLD)!=MPI_SUCCESS)
>> if(!ProcRank)
>> printf("Failed to broadcast matrix size\n");
>> if(!ProcRank)
>> printf("Broadcasting accuracy\n");
>> if(MPI_Bcast(&e, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD)!=MPI_SUCCESS)
>> if(!ProcRank)
>> printf("Failed to broadcast accuracy\n");
>> A=new double[size*size];
>> b=new double[size];
>> x=new double[size];
>> xprev=new double[size];
>>
>> memset(x, 0, size*sizeof(double));
>> memset(xprev, 0, size*sizeof(double));
>>
>> if(!ProcRank)
>> {
>> for(int i=0; i<size; i++)
>> for(int j=0; j<size; j++)
>> fscanf(f, "%lf", &(A[i*size+j]));
>> for(int i=0; i<size; i++)
>> fscanf(f, "%lf", &(b[i]));
>> fclose(f);
>> }
>>
>> if(!ProcRank)
>> printf("Waiting all processes to be initialized\n");
>> MPI_Barrier(MPI_COMM_WORLD);
>>
>> if(!ProcRank)
>> printf("Broadcasting matrix\n");
>> MPI_Bcast(A, size*size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
>> if(!ProcRank)
>> printf("Broadcasting b\n");
>> MPI_Bcast(b, size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
>>
>> double cursum, totalsum;
>> int fin, itercount=0;
>>
>> if(!ProcRank)
>> printf("Starting calculation\n");
>> struct timeval tv1, tv2;
>> gettimeofday(&tv1, 0);
>> do
>> {
>> MPI_Bcast(xprev, size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
>> itercount++;
>> for(int i=0; i<size; i++)
>> {
>> int snum=(size)/ProcNum;
>> cursum=0.0;
>> totalsum=0.0;
>> for(int n=ProcRank*snum;
>> n<((ProcRank==ProcNum-1)?(size):((ProcRank+1)*snum)); n++)
>> {
>> if(n<=i-1)
>> cursum+=A[i*size+n]*x[n];
>> else
>> cursum+=A[i*size+n]*xprev[n];
>> }
>> MPI_Barrier(MPI_COMM_WORLD);
>> MPI_Reduce(&cursum, &totalsum, 1, MPI_DOUBLE,
>> MPI_SUM, 0, MPI_COMM_WORLD);
>> if(!ProcRank)
>> {
>> x[i]=xprev[i]-1.0/A[i*size+i]*(totalsum-b[i]);
>> }
>> }
>> if(!ProcRank)
>> {
>> fin=1;
>> for(int i=0; i<size; i++)
>> if(fabs(x[i]-xprev[i])>e)
>> fin=0;
>> if(!fin)
>> memcpy(xprev, x, size*sizeof(double));
>> }
>> MPI_Bcast(&fin, 1, MPI_INT, 0, MPI_COMM_WORLD);
>> }
>> while(!fin);
>>
>> gettimeofday(&tv2, 0);
>>
>> if(!ProcRank)
>> {
>> printf("Matrix A:\n");
>> for(int i=0; i<size; i++)
>> {
>> for(int j=0; j<size; j++)
>> printf("%lf ", A[i*size+j]);
>> printf("\n");
>> }
>> printf("\n");
>> printf("Line b:\n");
>> for(int i=0; i<size; i++)
>> printf("%lf ", b[i]);
>> printf("\n");
>>
>> printf("Iteration number: %d\n", itercount);
>> printf("Calculation time: %d microseconds\n",
>> (tv2.tv_sec-tv1.tv_sec)*1000000+(tv2.tv_usec-tv1.tv_usec));
>>
>> printf("Results: \n");
>> for(int i=0; i<size; i++)
>> printf("x%d=%lf\n", i+1, x[i]);
>> }
>>
>> MPI_Barrier(MPI_COMM_WORLD);
>>
>> delete [] A;
>> delete [] b;
>> delete [] x;
>> delete [] xprev;
>>
>> MPI_Finalize();
>>
>> return 0;
>> }
>>
>>
>>
>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/mpich-discuss/attachments/20080917/ac828510/attachment.htm>
More information about the mpich-discuss
mailing list