[mpich-discuss] Mpich2: problems with communications
Rajeev Thakur
thakur at mcs.anl.gov
Wed Sep 17 08:34:40 CDT 2008
Does the cpi example from the examples directory run?
Does this program fail after the very first broadcast, i.e.,
> if(MPI_Bcast(&size, 1, MPI_INT, 0, MPI_COMM_WORLD)!=MPI_SUCCESS)
Rajeev
> -----Original Message-----
> From: owner-mpich-discuss at mcs.anl.gov
> [mailto:owner-mpich-discuss at mcs.anl.gov] On Behalf Of The Source
> Sent: Wednesday, September 17, 2008 7:45 AM
> To: mpich-discuss at mcs.anl.gov
> Subject: [mpich-discuss] Mpich2: problems with communications
>
> I'm using mpich2 1.0.7. I installed and configured it on 2
> machines, started daemons.
> The problems begin when I launch my program: first MPI_Bcast
> succeeds, but the second one (or any other communication
> function) hangs. Source code is attached.
>
> Also when I call MPI_Bcast, second machine deamon prints the
> following:
> F9Virtual64_mpdman_1 (run282): invalid msg from lhs;
> expecting ringsize
> got: {}
>
> Can anyone look at the code and tell me what's wrong?
>
> #include <mpi.h>
> #include <stdio.h>
> #include <string.h>
> #include <math.h>
> #include <sys/time.h>
>
> int main(int arg_c, char** arg_v)
> {
> if(arg_c<2)
> {
> printf("Input file not specified\n");
> return 0;
> }
> if(MPI_Init(&arg_c, &arg_v)!=MPI_SUCCESS)
> {
> printf("MPI initialization failed\n");
> return 0;
> }
> int ProcNum, ProcRank;
> MPI_Comm_size(MPI_COMM_WORLD, &ProcNum);
> MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank);
>
> int size;
> double e;
> double* A;
> double* b;
> double* x, *xprev;
>
> FILE* f=0;
> if(!ProcRank)
> {
> f=fopen(arg_v[1], "r");
> if(!f)
> {
> printf("Error opening input file\n");
> MPI_Finalize();
> return 0;
> }
> fscanf(f, "%d", &size);
> printf("Enter desired accuracy\n");
> scanf("%lf", &e);
> }
> if(!ProcRank)
> printf("Broadcasting matrix size\n");
> if(MPI_Bcast(&size, 1, MPI_INT, 0, MPI_COMM_WORLD)!=MPI_SUCCESS)
> if(!ProcRank)
> printf("Failed to broadcast matrix size\n");
> if(!ProcRank)
> printf("Broadcasting accuracy\n");
> if(MPI_Bcast(&e, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD)!=MPI_SUCCESS)
> if(!ProcRank)
> printf("Failed to broadcast accuracy\n");
> A=new double[size*size];
> b=new double[size];
> x=new double[size];
> xprev=new double[size];
>
> memset(x, 0, size*sizeof(double));
> memset(xprev, 0, size*sizeof(double));
>
> if(!ProcRank)
> {
> for(int i=0; i<size; i++)
> for(int j=0; j<size; j++)
> fscanf(f, "%lf", &(A[i*size+j]));
> for(int i=0; i<size; i++)
> fscanf(f, "%lf", &(b[i]));
> fclose(f);
> }
>
> if(!ProcRank)
> printf("Waiting all processes to be initialized\n");
> MPI_Barrier(MPI_COMM_WORLD);
>
> if(!ProcRank)
> printf("Broadcasting matrix\n");
> MPI_Bcast(A, size*size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
> if(!ProcRank)
> printf("Broadcasting b\n");
> MPI_Bcast(b, size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
>
> double cursum, totalsum;
> int fin, itercount=0;
>
> if(!ProcRank)
> printf("Starting calculation\n");
> struct timeval tv1, tv2;
> gettimeofday(&tv1, 0);
> do
> {
> MPI_Bcast(xprev, size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
> itercount++;
> for(int i=0; i<size; i++)
> {
> int snum=(size)/ProcNum;
> cursum=0.0;
> totalsum=0.0;
> for(int n=ProcRank*snum;
> n<((ProcRank==ProcNum-1)?(size):((ProcRank+1)*snum)); n++)
> {
> if(n<=i-1)
> cursum+=A[i*size+n]*x[n];
> else
> cursum+=A[i*size+n]*xprev[n];
> }
> MPI_Barrier(MPI_COMM_WORLD);
> MPI_Reduce(&cursum, &totalsum, 1, MPI_DOUBLE,
> MPI_SUM, 0, MPI_COMM_WORLD);
> if(!ProcRank)
> {
> x[i]=xprev[i]-1.0/A[i*size+i]*(totalsum-b[i]);
> }
> }
> if(!ProcRank)
> {
> fin=1;
> for(int i=0; i<size; i++)
> if(fabs(x[i]-xprev[i])>e)
> fin=0;
> if(!fin)
> memcpy(xprev, x, size*sizeof(double));
> }
> MPI_Bcast(&fin, 1, MPI_INT, 0, MPI_COMM_WORLD);
> }
> while(!fin);
>
> gettimeofday(&tv2, 0);
>
> if(!ProcRank)
> {
> printf("Matrix A:\n");
> for(int i=0; i<size; i++)
> {
> for(int j=0; j<size; j++)
> printf("%lf ", A[i*size+j]);
> printf("\n");
> }
> printf("\n");
> printf("Line b:\n");
> for(int i=0; i<size; i++)
> printf("%lf ", b[i]);
> printf("\n");
>
> printf("Iteration number: %d\n", itercount);
> printf("Calculation time: %d microseconds\n",
> (tv2.tv_sec-tv1.tv_sec)*1000000+(tv2.tv_usec-tv1.tv_usec));
>
> printf("Results: \n");
> for(int i=0; i<size; i++)
> printf("x%d=%lf\n", i+1, x[i]);
> }
>
> MPI_Barrier(MPI_COMM_WORLD);
>
> delete [] A;
> delete [] b;
> delete [] x;
> delete [] xprev;
>
> MPI_Finalize();
>
> return 0;
> }
>
>
More information about the mpich-discuss
mailing list