[mpich-discuss] Mpich2: problems with communications

The Source thesourcehim at gmail.com
Wed Sep 17 10:46:07 CDT 2008


cpi starts and writes the following:
Process 0 of 2 is on WARZONE
Process 1 of 2 is on F9Virtual64
Nothing more. It never exits. I didn't pass any parameters to cpi. Should I?

MPI_Bcast(&size, 1, MPI_INT, 0, MPI_COMM_WORLD) returns success, my program then prints that it is about to call next broadcast, but that's it, next broadcast never returns.
My second node is virtual Fedora 9 x86_64 (VMWare), running on server node (real Fedora 9 x86_64). Could it be the problem?



Rajeev Thakur ?????:
> Does the cpi example from the examples directory run? 
>
> Does this program fail after the very first broadcast, i.e.,
>   
>>     if(MPI_Bcast(&size, 1, MPI_INT, 0, MPI_COMM_WORLD)!=MPI_SUCCESS)
>>     
>
> Rajeev
>
>   
>> -----Original Message-----
>> From: owner-mpich-discuss at mcs.anl.gov 
>> [mailto:owner-mpich-discuss at mcs.anl.gov] On Behalf Of The Source
>> Sent: Wednesday, September 17, 2008 7:45 AM
>> To: mpich-discuss at mcs.anl.gov
>> Subject: [mpich-discuss] Mpich2: problems with communications
>>
>> I'm using mpich2 1.0.7. I installed and configured it on 2 
>> machines, started daemons.
>> The problems begin when I launch my program: first MPI_Bcast 
>> succeeds, but the second one (or any other communication 
>> function) hangs. Source code is attached.
>>
>> Also when I call MPI_Bcast, second machine deamon prints the 
>> following:
>> F9Virtual64_mpdman_1 (run282): invalid msg from lhs; 
>> expecting ringsize
>> got: {}
>>
>> Can anyone look at the code and tell me what's wrong?
>>
>> #include <mpi.h>
>> #include <stdio.h>
>> #include <string.h>
>> #include <math.h>
>> #include <sys/time.h>
>>
>> int main(int arg_c, char** arg_v)
>> {
>>     if(arg_c<2)
>>     {
>>         printf("Input file not specified\n");
>>         return 0;
>>     }
>>     if(MPI_Init(&arg_c, &arg_v)!=MPI_SUCCESS)
>>     {
>>         printf("MPI initialization failed\n");
>>         return 0;
>>     }
>>     int ProcNum, ProcRank;
>>     MPI_Comm_size(MPI_COMM_WORLD, &ProcNum);
>>     MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank);
>>    
>>     int size;
>>     double e;
>>     double* A;
>>     double* b;
>>     double* x, *xprev;
>>    
>>     FILE* f=0;
>>     if(!ProcRank)
>>     {
>>         f=fopen(arg_v[1], "r");
>>         if(!f)
>>         {
>>             printf("Error opening input file\n");
>>             MPI_Finalize();
>>             return 0;
>>         }
>>         fscanf(f, "%d", &size);
>>         printf("Enter desired accuracy\n");
>>         scanf("%lf", &e);
>>     }
>>     if(!ProcRank)
>>         printf("Broadcasting matrix size\n");
>>     if(MPI_Bcast(&size, 1, MPI_INT, 0, MPI_COMM_WORLD)!=MPI_SUCCESS)
>>         if(!ProcRank)
>>             printf("Failed to broadcast matrix size\n");
>>     if(!ProcRank)
>>         printf("Broadcasting accuracy\n");
>>     if(MPI_Bcast(&e, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD)!=MPI_SUCCESS)
>>         if(!ProcRank)
>>             printf("Failed to broadcast accuracy\n");
>>     A=new double[size*size];
>>     b=new double[size];
>>     x=new double[size];
>>     xprev=new double[size];
>>    
>>     memset(x, 0, size*sizeof(double));
>>     memset(xprev, 0, size*sizeof(double));
>>    
>>     if(!ProcRank)
>>     {
>>         for(int i=0; i<size; i++)
>>             for(int j=0; j<size; j++)
>>                 fscanf(f, "%lf", &(A[i*size+j]));
>>         for(int i=0; i<size; i++)
>>             fscanf(f, "%lf", &(b[i]));
>>         fclose(f);
>>     }
>>    
>>     if(!ProcRank)
>>         printf("Waiting all processes to be initialized\n");
>>     MPI_Barrier(MPI_COMM_WORLD);
>>    
>>     if(!ProcRank)
>>         printf("Broadcasting matrix\n");
>>     MPI_Bcast(A, size*size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
>>     if(!ProcRank)
>>         printf("Broadcasting b\n");
>>     MPI_Bcast(b, size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
>>    
>>     double cursum, totalsum;
>>     int fin, itercount=0;
>>    
>>     if(!ProcRank)
>>         printf("Starting calculation\n");
>>     struct timeval tv1, tv2;
>>     gettimeofday(&tv1, 0);
>>     do
>>     {
>>         MPI_Bcast(xprev, size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
>>         itercount++;
>>         for(int i=0; i<size; i++)
>>         {
>>             int snum=(size)/ProcNum;
>>             cursum=0.0;
>>             totalsum=0.0;
>>             for(int n=ProcRank*snum;
>> n<((ProcRank==ProcNum-1)?(size):((ProcRank+1)*snum)); n++)
>>             {
>>                 if(n<=i-1)
>>                     cursum+=A[i*size+n]*x[n];
>>                 else
>>                     cursum+=A[i*size+n]*xprev[n];
>>             }
>>             MPI_Barrier(MPI_COMM_WORLD);
>>             MPI_Reduce(&cursum, &totalsum, 1, MPI_DOUBLE, 
>> MPI_SUM, 0, MPI_COMM_WORLD);
>>             if(!ProcRank)
>>             {
>>                 x[i]=xprev[i]-1.0/A[i*size+i]*(totalsum-b[i]);
>>             }
>>         }
>>         if(!ProcRank)
>>         {
>>             fin=1;
>>             for(int i=0; i<size; i++)
>>                 if(fabs(x[i]-xprev[i])>e)
>>                     fin=0;
>>             if(!fin)
>>                 memcpy(xprev, x, size*sizeof(double));
>>         }
>>         MPI_Bcast(&fin, 1, MPI_INT, 0, MPI_COMM_WORLD);
>>     }
>>     while(!fin);
>>    
>>     gettimeofday(&tv2, 0);
>>    
>>     if(!ProcRank)
>>     {
>>         printf("Matrix A:\n");
>>         for(int i=0; i<size; i++)
>>         {
>>             for(int j=0; j<size; j++)
>>                 printf("%lf ", A[i*size+j]);
>>             printf("\n");
>>         }
>>         printf("\n");
>>         printf("Line b:\n");
>>         for(int i=0; i<size; i++)
>>             printf("%lf ", b[i]);
>>         printf("\n");
>>        
>>         printf("Iteration number: %d\n", itercount);
>>         printf("Calculation time: %d microseconds\n", 
>> (tv2.tv_sec-tv1.tv_sec)*1000000+(tv2.tv_usec-tv1.tv_usec));
>>        
>>         printf("Results: \n");
>>         for(int i=0; i<size; i++)
>>             printf("x%d=%lf\n", i+1, x[i]);
>>     }
>>    
>>     MPI_Barrier(MPI_COMM_WORLD);
>>    
>>     delete [] A;
>>     delete [] b;
>>     delete [] x;
>>     delete [] xprev;
>>    
>>     MPI_Finalize();
>>    
>>     return 0;
>> }
>>
>>
>>     
>
>
>   

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/mpich-discuss/attachments/20080917/ac828510/attachment.htm>


More information about the mpich-discuss mailing list