[mpich-discuss] Mpich2: problems with communications

Rajeev Thakur thakur at mcs.anl.gov
Wed Sep 17 08:34:40 CDT 2008


Does the cpi example from the examples directory run? 

Does this program fail after the very first broadcast, i.e.,
>     if(MPI_Bcast(&size, 1, MPI_INT, 0, MPI_COMM_WORLD)!=MPI_SUCCESS)

Rajeev

> -----Original Message-----
> From: owner-mpich-discuss at mcs.anl.gov 
> [mailto:owner-mpich-discuss at mcs.anl.gov] On Behalf Of The Source
> Sent: Wednesday, September 17, 2008 7:45 AM
> To: mpich-discuss at mcs.anl.gov
> Subject: [mpich-discuss] Mpich2: problems with communications
> 
> I'm using mpich2 1.0.7. I installed and configured it on 2 
> machines, started daemons.
> The problems begin when I launch my program: first MPI_Bcast 
> succeeds, but the second one (or any other communication 
> function) hangs. Source code is attached.
> 
> Also when I call MPI_Bcast, second machine deamon prints the 
> following:
> F9Virtual64_mpdman_1 (run282): invalid msg from lhs; 
> expecting ringsize
> got: {}
> 
> Can anyone look at the code and tell me what's wrong?
> 
> #include <mpi.h>
> #include <stdio.h>
> #include <string.h>
> #include <math.h>
> #include <sys/time.h>
> 
> int main(int arg_c, char** arg_v)
> {
>     if(arg_c<2)
>     {
>         printf("Input file not specified\n");
>         return 0;
>     }
>     if(MPI_Init(&arg_c, &arg_v)!=MPI_SUCCESS)
>     {
>         printf("MPI initialization failed\n");
>         return 0;
>     }
>     int ProcNum, ProcRank;
>     MPI_Comm_size(MPI_COMM_WORLD, &ProcNum);
>     MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank);
>    
>     int size;
>     double e;
>     double* A;
>     double* b;
>     double* x, *xprev;
>    
>     FILE* f=0;
>     if(!ProcRank)
>     {
>         f=fopen(arg_v[1], "r");
>         if(!f)
>         {
>             printf("Error opening input file\n");
>             MPI_Finalize();
>             return 0;
>         }
>         fscanf(f, "%d", &size);
>         printf("Enter desired accuracy\n");
>         scanf("%lf", &e);
>     }
>     if(!ProcRank)
>         printf("Broadcasting matrix size\n");
>     if(MPI_Bcast(&size, 1, MPI_INT, 0, MPI_COMM_WORLD)!=MPI_SUCCESS)
>         if(!ProcRank)
>             printf("Failed to broadcast matrix size\n");
>     if(!ProcRank)
>         printf("Broadcasting accuracy\n");
>     if(MPI_Bcast(&e, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD)!=MPI_SUCCESS)
>         if(!ProcRank)
>             printf("Failed to broadcast accuracy\n");
>     A=new double[size*size];
>     b=new double[size];
>     x=new double[size];
>     xprev=new double[size];
>    
>     memset(x, 0, size*sizeof(double));
>     memset(xprev, 0, size*sizeof(double));
>    
>     if(!ProcRank)
>     {
>         for(int i=0; i<size; i++)
>             for(int j=0; j<size; j++)
>                 fscanf(f, "%lf", &(A[i*size+j]));
>         for(int i=0; i<size; i++)
>             fscanf(f, "%lf", &(b[i]));
>         fclose(f);
>     }
>    
>     if(!ProcRank)
>         printf("Waiting all processes to be initialized\n");
>     MPI_Barrier(MPI_COMM_WORLD);
>    
>     if(!ProcRank)
>         printf("Broadcasting matrix\n");
>     MPI_Bcast(A, size*size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
>     if(!ProcRank)
>         printf("Broadcasting b\n");
>     MPI_Bcast(b, size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
>    
>     double cursum, totalsum;
>     int fin, itercount=0;
>    
>     if(!ProcRank)
>         printf("Starting calculation\n");
>     struct timeval tv1, tv2;
>     gettimeofday(&tv1, 0);
>     do
>     {
>         MPI_Bcast(xprev, size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
>         itercount++;
>         for(int i=0; i<size; i++)
>         {
>             int snum=(size)/ProcNum;
>             cursum=0.0;
>             totalsum=0.0;
>             for(int n=ProcRank*snum;
> n<((ProcRank==ProcNum-1)?(size):((ProcRank+1)*snum)); n++)
>             {
>                 if(n<=i-1)
>                     cursum+=A[i*size+n]*x[n];
>                 else
>                     cursum+=A[i*size+n]*xprev[n];
>             }
>             MPI_Barrier(MPI_COMM_WORLD);
>             MPI_Reduce(&cursum, &totalsum, 1, MPI_DOUBLE, 
> MPI_SUM, 0, MPI_COMM_WORLD);
>             if(!ProcRank)
>             {
>                 x[i]=xprev[i]-1.0/A[i*size+i]*(totalsum-b[i]);
>             }
>         }
>         if(!ProcRank)
>         {
>             fin=1;
>             for(int i=0; i<size; i++)
>                 if(fabs(x[i]-xprev[i])>e)
>                     fin=0;
>             if(!fin)
>                 memcpy(xprev, x, size*sizeof(double));
>         }
>         MPI_Bcast(&fin, 1, MPI_INT, 0, MPI_COMM_WORLD);
>     }
>     while(!fin);
>    
>     gettimeofday(&tv2, 0);
>    
>     if(!ProcRank)
>     {
>         printf("Matrix A:\n");
>         for(int i=0; i<size; i++)
>         {
>             for(int j=0; j<size; j++)
>                 printf("%lf ", A[i*size+j]);
>             printf("\n");
>         }
>         printf("\n");
>         printf("Line b:\n");
>         for(int i=0; i<size; i++)
>             printf("%lf ", b[i]);
>         printf("\n");
>        
>         printf("Iteration number: %d\n", itercount);
>         printf("Calculation time: %d microseconds\n", 
> (tv2.tv_sec-tv1.tv_sec)*1000000+(tv2.tv_usec-tv1.tv_usec));
>        
>         printf("Results: \n");
>         for(int i=0; i<size; i++)
>             printf("x%d=%lf\n", i+1, x[i]);
>     }
>    
>     MPI_Barrier(MPI_COMM_WORLD);
>    
>     delete [] A;
>     delete [] b;
>     delete [] x;
>     delete [] xprev;
>    
>     MPI_Finalize();
>    
>     return 0;
> }
> 
> 




More information about the mpich-discuss mailing list