#include #include #include /* Data size in REAL's, per process (override with compiler flag if you'd like) */ /* Note: in most real applications this would be quite small (just local norms, say) */ #define DATASIZE 32 /* How many burnin and trial runs to perform */ #define NBURNIN 5 #define NTRIALS 100 /* to scale local work time */ #define LOCALREPS 50000 /* which kind of reduction to test */ #define REDOP MPI_SUM /* datatype to test with (double is probably most relevant) */ #define REAL double #define MPI_MY_REAL MPI_DOUBLE #define CHKERR(x) do{if(x>0){printf("MPI Error on line __LINE__"); exit(1);}}while(0) /* Do some useless work on a scalar */ #define WORKFUNCTION(x) d*(d + 1.2232*(232/(d+232.3 - 0.1*d+(d-3.2))))/(21232.34 + d*d/(2323.44/(d*(d+2.232)))) /***************************************************************************************\ Ctx to operate on \***************************************************************************************/ /* A structure to define problem data to be operated upon */ typedef struct { REAL * array_send; REAL * array_recv; REAL * array_work; } Data; /* Forward Declarations */ void trial_blockingredonly(Data*); void trial_nonblockingredonly(Data*); void trial_loconly(Data*); void trial_blocking(Data*); void trial_nonblocking(Data*); /* Create and destroy data to operate on */ void create(Data * pData) { int i; pData->array_recv = (REAL*) malloc(DATASIZE * sizeof(REAL)); pData->array_send = (REAL*) malloc(DATASIZE * sizeof(REAL)); for(i=0;iarray_send[i] = 4389034389*i + 34933434244; } pData->array_work = (REAL*) malloc(DATASIZE * sizeof(REAL)); } void destroy(Data * pData) { free(pData->array_recv); free(pData->array_send); free(pData->array_work); } /***************************************************************************************\ Main \***************************************************************************************/ int main(int argc,char ** argv) { int i,ierr; REAL avg_time_blocking, avg_time_nonblocking, avg_time_blockingredonly, avg_time_nonblockingredonly, avg_time_loconly; REAL totals[5],time_min[5],time_max[5]; int rank,size; /* Setup */ ierr = MPI_Init(&argc,&argv); CHKERR(ierr); ierr = MPI_Comm_rank(MPI_COMM_WORLD,&rank);CHKERR(ierr); ierr = MPI_Comm_size(MPI_COMM_WORLD,&size);CHKERR(ierr); if (!rank) { printf("Testing with %d MPI ranks\nreducing an array of size %d (%ld bytes)\n",size,DATASIZE,DATASIZE * sizeof(REAL)); printf("Running %d burnin runs and %d tests ... ",NBURNIN,NTRIALS); } /* Burn in 1 */ { Data data; create(&data); for(i=0;i 0 ? rank-1 : size-1; */ /* Local work */ { int j; int dummy; for(j=0;jarray_work[i]; //pData->array_work[i] = d*(d + 1.2232*(232/(d+232.3 - 0.1*d+(d-3.2))))/(21232.34 + d*d/(2323.44/(d*(d+2.232)))); pData->array_work[i] = WORKFUNCTION(d); //pData->array_work[i] = simple_rand_n(d,10); } } } /***************************************************************************************\ Blocking Trial \***************************************************************************************/ void trial_blocking(Data * pData) { int ierr; other_work(pData); ierr = MPI_Allreduce(pData->array_send,pData->array_recv,DATASIZE,MPI_MY_REAL,MPI_SUM,MPI_COMM_WORLD);CHKERR(ierr); } /***************************************************************************************\ Nonblocking Trial \***************************************************************************************/ void trial_nonblocking(Data * pData) { int ierr; MPI_Request request; ierr = MPI_Iallreduce(pData->array_send,pData->array_recv,DATASIZE,MPI_MY_REAL,MPI_SUM,MPI_COMM_WORLD,&request);CHKERR(ierr); other_work(pData); ierr = MPI_Wait(&request,MPI_STATUS_IGNORE); } /***************************************************************************************\ Reduction Only Controls \***************************************************************************************/ void trial_blockingredonly(Data * pData) { int ierr; ierr = MPI_Allreduce(pData->array_send,pData->array_recv,DATASIZE,MPI_MY_REAL,MPI_SUM,MPI_COMM_WORLD);CHKERR(ierr); } void trial_nonblockingredonly(Data * pData) { int ierr; MPI_Request r; ierr = MPI_Iallreduce(pData->array_send,pData->array_recv,DATASIZE,MPI_MY_REAL,MPI_SUM,MPI_COMM_WORLD,&r);CHKERR(ierr); ierr = MPI_Wait(&r,MPI_STATUS_IGNORE);CHKERR(ierr); } /***************************************************************************************\ Local work only control \***************************************************************************************/ void trial_loconly(Data * pData) { other_work(pData); }