example timings
michael bane
michael.bane at manchester.ac.uk
Wed Jul 18 10:30:46 CDT 2007
I've attached my example code and a plot of example timings (raw data
available if anybody wants it) which seem to raise a few points. The
hardware is a Bull badged Itanium2 box, with nodes connected by Quadrics
QsNetII and each node is 4 dual core chips (details:
http://www.mc.manchester.ac.uk/services/hpc/hardware) and it's running a
version of mpich2.
I believe my example code is appropriate but I'm happy to hear about
bugs/improvements
a) there's 10% variation in run times (!!!) -- see 'initialisation'
which is serial and done on rank0 processor (also see serial-netcdf)
b) parallel netcdf is indeed faster than serial (ie 'normal) netcdf for
>2 processors, but only scales reasonably to about 8 cores -- this is
disappointing. Any thoughts on this? I don't think it's the interconnect
since the times level off rather than drop further...
c) the "gather" is my implementation of gathering data off all MPI
processes and then writing (serial) netcdf file - most of this time is
file I/O not comms gathering the data
d) 'serial_p_total' is using parallel netcdf "collective" mode but only
rank0 process writing all the data. Not quite sure why it's so bad or
takes longer as #PEs increase. Again any input is welcomed!!!
e) the example code is attached and may well include errors or mistakes
but so please don't pass this around without prior permission. If people
want I'm happy to provide once any discussion over the code/results has
concluded.
I think it would be useful if there was a small repository of example
testcases/benchmarks for (future) users...
--
Michael Bane
Centre for Atmospheric Science
University of Manchester, U.K.
http://cloudbase.phy.umist.ac.uk/people/bane/bane.htm
-------------- next part --------------
A non-text attachment was scrubbed...
Name: data_timing_plots_totals.png
Type: image/png
Size: 2633463 bytes
Desc: not available
URL: <http://lists.mcs.anl.gov/pipermail/parallel-netcdf/attachments/20070718/0ffca67b/attachment.png>
-------------- next part --------------
program netcdf_speed_test_1d
! (c) michael.bane at manchester.ac.uk (Summer 2007)
use netcdf
use mpi
implicit none
#include "pnetcdf.inc"
! directory for output files - must be accessible by all nodes
character*(*), parameter:: dir='/home/horace/mccssmb2/tmp/test_par_netcdf'
! test to compare times taken to read/write netCDF with/with parallel netCDF
! uses variable: dummy(c) for
integer, parameter:: max_c=100000000
!! integer, parameter:: max_c=61200000
!! integer, parameter:: max_c=1*2*3*4*5*7*9*11 ! 83160
character*(5), parameter:: varName='dummy'
integer, parameter:: varType=nf90_double ! relates to NF_TYPE for val
integer:: dims, slave
integer, parameter:: numDims=1
! p-netCDF required lengths to be NFMPI_OFFSET not integer
NFMPI_OFFSET:: dim_len(numDims)=(/100000000/) ! same as max_c
!! NFMPI_OFFSET:: dim_len(numDims)=(/61200000/) ! same as max_c
!! NFMPI_OFFSET:: dim_len(numDims)=(/1*2*3*4*5*7*9*11/) ! same as max_c
integer:: ncfileID, dimID, dim_c_ID, varID
integer:: stat
NFMPI_OFFSET:: start(numDims), count(numDims)
!! data start /1/
!! data count /max_c/
integer:: finish(numDims)
! non-netcdf vars:
integer:: c
double precision:: val(max_c)
double precision, allocatable:: receive_val(:)
integer, allocatable:: numReceived(:), displaceFromStart(:)
integer:: allocStat
! MPI
integer, parameter:: numTimingPts=9, maxInfo=64
double precision:: clock(numTimingPts)
character*(maxInfo), dimension(numTimingPts):: clock_info
integer:: ierr, myPE, numProcs
! -----------------------------------------------------------------------------------------
call MPI_INIT( ierr )
call MPI_COMM_RANK( MPI_COMM_WORLD, myPE, ierr )
call MPI_COMM_SIZE( MPI_COMM_WORLD, numProcs, ierr )
write(*,'("Hi from #",i2,"/",i2)') myPE, numProcs
! define values for val(:,:,:,:)
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
if (myPE==0) clock(1) = MPI_Wtime()
do c=1, max_c
val(c) = float(c)
end do
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
! -----------------------------------------------------------------------------------------
! *** netcdf f90 version: using only master proc
if (myPE == 0) then
clock(2) = MPI_Wtime()
! (i) create file
stat = nf90_create(path=dir//'/newFile_f90.ncf', cmode=nf90_noclobber, ncid=ncfileID)
if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)
! (ii) define dimensions
stat = nf90_def_dim(ncfileID, 'c', max_c, dim_c_ID)
if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)
! (iii) define variables
stat = nf90_def_var(ncfileID, varName, varType, (/ dim_c_ID /), varID)
if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)
! (iv) end define mode (commit to disk)
stat = nf90_enddef(ncfileID)
if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)
! (v) use val(:) to fill varID
stat = nf90_put_var(ncfileID, varID, val)
if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)
! (vi) close file
stat = nf90_close(ncfileID)
if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)
clock(3) = MPI_Wtime()
else
! do nothing
end if
call mpi_barrier(MPI_COMM_WORLD, ierr)
! -----------------------------------------------------------------------------------------
! *** PARALLEL -- collect data to master and then write to file
if(myPE==0) then
allocate(receive_val(1:max_c), numReceived(1:numProcs), displaceFromStart(1:numProcs), stat=allocStat)
if (allocStat /= 0) then
call mpi_abort(MPI_COMM_WORLD, allocStat, ierr)
STOP 'could not allocate receive arrays'
end if
end if
call mpi_barrier(MPI_COMM_WORLD, ierr)
clock(4) = MPI_Wtime()
! *** need to pass relevant parts of each PE's (local) val(:)
do dims=1, numDims
count(dims) = dim_len(dims)/numProcs
start(dims) = (myPE)*count(dims) + 1
if(myPE == numProcs-1) then
! less PE may have less items
count(dims) = dim_len(dims) - start(dims) + 1
end if
finish(dims) = start(dims) + count(dims) - 1
end do
! vars for differing number of data from processors
if (myPE==0) then
if (numProcs>1) then
do slave=1, numProcs-1
displaceFromStart(slave) = (slave-1)*(dim_len(1)/numProcs)
numReceived(slave) = (dim_len(1)/numProcs)
end do
displaceFromStart(numProcs) = displaceFromStart(numProcs-1) + numReceived(numProcs-1)
numReceived(numProcs) = dim_len(1) - displaceFromStart(numProcs)
else
displaceFromStart(1) = 0
numReceived(1) = dim_len(1)
end if
!!! write(*,*) 'displace:',displaceFromStart
!!! write(*,*) 'numRecvd:',numReceived
end if
! only 1d
!!! write(*,*) myPE, start(1), finish(1), count(1)
!!! call mpi_barrier(MPI_COMM_WORLD, ierr)
! *** need to allow diff num from last PE
call mpi_gatherv(val(start(1):finish(1)), count(1), MPI_DOUBLE_PRECISION, receive_val, numReceived, displaceFromStart, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
!!! write(*,*) 'done gatherv'
!!! if (myPE==0) write(*,*) numReceived
!!! if (myPE==0) write(*,*) receive_val
call mpi_barrier(MPI_COMM_WORLD, ierr)
clock(5) = MPI_Wtime()
!!! write(*,*) '...done gather'
if (myPE==0) then
! *** check results
!!! write(*,*) 'max diff=', maxval(abs(receive_val - val))
! (i) create file
stat = nf90_create(path=dir//'/newFile_gather_f90.ncf', cmode=nf90_noclobber, ncid=ncfileID)
if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)
! (ii) define dimensions
stat = nf90_def_dim(ncfileID, 'c', max_c, dim_c_ID)
if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)
! (iii) define variables
stat = nf90_def_var(ncfileID, varName, varType, (/ dim_c_ID /), varID)
if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)
! (iv) end define mode (commit to disk)
stat = nf90_enddef(ncfileID)
if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)
! (v) use receive_val(:) to fill varID
stat = nf90_put_var(ncfileID, varID, receive_val)
if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)
! (vi) close file
stat = nf90_close(ncfileID)
if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)
end if
call mpi_barrier(MPI_COMM_WORLD, ierr)
clock(6) = MPI_Wtime()
!!! write(*,*) 'done gather+write'
! -----------------------------------------------------------------------------------------
! *** PARALLEL netcdf version (use same array)
! (i) create file
!!! write(*,*) 'p-netcdf: create'
stat = nfmpi_create(MPI_COMM_WORLD, dir//'/newFile_p.ncf', nf_noclobber, MPI_INFO_NULL, ncfileID)
if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
!!! write(*,*) 'done'
! (ii) define dimensions
!!! write(*,*) 'p-netcdf: def dim c'
stat = nfmpi_def_dim(ncfileID, 'c', dim_len(1), dim_c_ID)
if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
!!! write(*,*) 'done'
call mpi_barrier(MPI_COMM_WORLD, ierr)
! (iii) define variables
!!! write(*,*) 'p-netcdf: def var'
stat = nfmpi_def_var(ncfileID, varName, varType, numDims, (/ dim_c_ID /), varID)
if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
!!! write(*,*) 'done'
! (iv) end define mode (commit to disk)
!!! write(*,*) 'p-netcdf: end def'
stat = nfmpi_enddef(ncfileID)
if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
!!! write(*,*) 'done'
call mpi_barrier(MPI_COMM_WORLD, ierr)
if (myPE == 0) clock(7) = MPI_Wtime()
! (v) use val(:) to fill varID
!!! write(*,*) 'p-netcdf: put var'
! NB: collective 'all vals of var' not exist
!!! write(*,*) SHAPE(val)
!!! write(*,*) start, count
!!! write(*,*) start+count
start(1)=1
count(1)=max_c
!!! write(10+myPE,*) start
!!! write(10+myPE,*) count
!!! write(10+myPE,*) val(start(1):start(1)+count(1)-1)
stat = nfmpi_put_vara_double_all(ncfileID, varID, start, count, val)
! stat = nfmpi_put_vara_double(ncfileID, varID, start, count, val)
if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
!!! write(*,*) 'done'
! (vi) close file
stat = nfmpi_close(ncfileID)
if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
call mpi_barrier(MPI_COMM_WORLD, ierr)
if (myPE == 0) clock(8) = MPI_Wtime()
! -----------------------------------------------------------------------------------------
! *** PARALLEL netcdf version (diff PEs write different sections)
! (i) create file
!!! write(*,*) 'p-netcdf PAR: create'
stat = nfmpi_create(MPI_COMM_WORLD, dir//'/newFile_p2.ncf', nf_noclobber, MPI_INFO_NULL, ncfileID)
if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
!!! write(*,*) 'done'
! (ii) define dimensions
!!! write(*,*) 'p-netcdf PAR: def dim c'
stat = nfmpi_def_dim(ncfileID, 'c', dim_len(1), dim_c_ID)
if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
!!! write(*,*) 'done'
call mpi_barrier(MPI_COMM_WORLD, ierr)
! (iii) define variables
!!! write(*,*) 'p-netcdf PAR: def var'
stat = nfmpi_def_var(ncfileID, varName, varType, numDims, (/ dim_c_ID /), varID)
if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
!!! write(*,*) 'done'
! (iv) end define mode (commit to disk)
!!! write(*,*) 'p-netcdf PAR: end def'
stat = nfmpi_enddef(ncfileID)
if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
!!! write(*,*) 'done'
call mpi_barrier(MPI_COMM_WORLD, ierr)
if (myPE == 0) clock(9) = MPI_Wtime()
! (v) use val(:) to fill varID *** each PE writes a segment ***
!!! write(*,*) 'p-netcdf PAR: put var'
! NB: collective 'all vals of var' not exist
!!! write(*,*) SHAPE(val)
do dims=1, numDims
count(dims) = dim_len(dims)/numProcs
start(dims) = (myPE)*count(dims) + 1
if(myPE == numProcs-1) then
! less PE may have less items
count(dims) = dim_len(dims) - start(dims) + 1
end if
end do
!!! write(*,'("start:",4(i9,1x))') start
!!! write(*,'("count:",4(i9,1x))') count
!!! write(*,'(" tots:",4(i9,1x))') start+count-1
!!! write(*,*) 'my first val=', val(start)
stat = nfmpi_begin_indep_data(ncfileID)
if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
stat = nfmpi_put_vara_double(ncfileID, varID, start, count, val(start(1)))
if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
stat = nfmpi_end_indep_data(ncfileID)
if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
!!! write(*,*) 'done'
call mpi_barrier(MPI_COMM_WORLD, ierr)
! (vi) close file
stat = nfmpi_close(ncfileID)
if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
call mpi_barrier(MPI_COMM_WORLD, ierr)
if (myPE == 0) clock(10) = MPI_Wtime()
10 FORMAT(1x,'(a)',10(1x,i3))
! -----------------------------------------------------------------------------------------
! *** output timing info
if(myPE==0) then
clock_info(02) = 'initialize array'
clock_info(03) = ' netcdf f90: create and fill'
clock_info(04) = ' gather: allocate'
clock_info(05) = ' gather: gather'
clock_info(06) = ' gather: fill'
clock_info(07) = 'p-netcdf: create/define'
clock_info(08) = 'p-netcdf: fill'
clock_info(09) = 'p-netcdf/split: create/define'
clock_info(10) = 'p-netcdf/split: fill'
call output_clock(clock, clock_info, 10, myPE)
end if
call mpi_finalize(ierr)
end program netcdf_speed_test_1d
More information about the parallel-netcdf
mailing list