example timings

michael bane michael.bane at manchester.ac.uk
Wed Jul 18 10:30:46 CDT 2007


I've attached my example code and a plot of example timings (raw data
available if anybody wants it) which seem to raise a few points. The
hardware is a Bull badged Itanium2 box, with nodes connected by Quadrics
QsNetII and each node is 4 dual core chips (details:
http://www.mc.manchester.ac.uk/services/hpc/hardware) and it's running a
version of mpich2.

I believe my example code is appropriate but I'm happy to hear about
bugs/improvements

a) there's 10% variation in run times (!!!) -- see 'initialisation'
which is serial and done on rank0 processor (also see serial-netcdf)

b) parallel netcdf is indeed faster than serial (ie 'normal) netcdf for
>2 processors, but only scales reasonably to about 8 cores -- this is
disappointing. Any thoughts on this? I don't think it's the interconnect
since the times level off rather than drop further...

c) the "gather" is my implementation of gathering data off all MPI
processes and then writing (serial) netcdf file - most of this time is
file I/O not comms gathering the data

d) 'serial_p_total' is using parallel netcdf "collective" mode but only
rank0 process writing all the data. Not quite sure why it's so bad or
takes longer as #PEs increase. Again any input is welcomed!!!

e) the example code is attached and may well include errors or mistakes
but so please don't pass this around without prior permission. If people
want I'm happy to provide once any discussion over the code/results has
concluded.

I think it would be useful if there was a small repository of example
testcases/benchmarks for (future) users...

-- 
Michael Bane
Centre for Atmospheric Science
University of Manchester, U.K.
http://cloudbase.phy.umist.ac.uk/people/bane/bane.htm
-------------- next part --------------
A non-text attachment was scrubbed...
Name: data_timing_plots_totals.png
Type: image/png
Size: 2633463 bytes
Desc: not available
URL: <http://lists.mcs.anl.gov/pipermail/parallel-netcdf/attachments/20070718/0ffca67b/attachment.png>
-------------- next part --------------
      program netcdf_speed_test_1d
! (c) michael.bane at manchester.ac.uk (Summer 2007)
        use netcdf
        use mpi
        implicit none

#include "pnetcdf.inc"


! directory for output files - must be accessible by all nodes
        character*(*), parameter:: dir='/home/horace/mccssmb2/tmp/test_par_netcdf'

! test to compare times taken to read/write netCDF with/with parallel netCDF
! uses variable: dummy(c) for 
        integer, parameter:: max_c=100000000
!!        integer, parameter:: max_c=61200000
!!        integer, parameter:: max_c=1*2*3*4*5*7*9*11 ! 83160
        character*(5), parameter:: varName='dummy'
        integer, parameter:: varType=nf90_double  ! relates to NF_TYPE for val

	integer:: dims, slave
	integer, parameter:: numDims=1

! p-netCDF required lengths to be NFMPI_OFFSET not integer
	NFMPI_OFFSET:: dim_len(numDims)=(/100000000/)  ! same as max_c
!!	NFMPI_OFFSET:: dim_len(numDims)=(/61200000/)  ! same as max_c
!!	NFMPI_OFFSET:: dim_len(numDims)=(/1*2*3*4*5*7*9*11/)  ! same as max_c

        integer:: ncfileID, dimID, dim_c_ID, varID
        integer:: stat

        NFMPI_OFFSET:: start(numDims), count(numDims)
!!        data start /1/
!!        data count /max_c/
        integer:: finish(numDims)

! non-netcdf vars:
        integer:: c
        double precision:: val(max_c)

        double precision, allocatable:: receive_val(:)
        integer, allocatable:: numReceived(:), displaceFromStart(:)
        integer:: allocStat

! MPI
        integer, parameter:: numTimingPts=9, maxInfo=64
        double precision:: clock(numTimingPts)
        character*(maxInfo), dimension(numTimingPts):: clock_info
        integer:: ierr, myPE, numProcs

! -----------------------------------------------------------------------------------------
        call MPI_INIT( ierr )
        call MPI_COMM_RANK( MPI_COMM_WORLD, myPE, ierr )
        call MPI_COMM_SIZE( MPI_COMM_WORLD, numProcs, ierr )

        write(*,'("Hi from #",i2,"/",i2)') myPE, numProcs

! define values for val(:,:,:,:)
        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
        if (myPE==0) clock(1) = MPI_Wtime()
        do c=1, max_c
	   val(c) = float(c)
        end do

        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
! -----------------------------------------------------------------------------------------
! *** netcdf f90 version: using only master proc
        if (myPE == 0) then
           clock(2) = MPI_Wtime()
! (i) create file
           stat = nf90_create(path=dir//'/newFile_f90.ncf', cmode=nf90_noclobber, ncid=ncfileID)
           if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)
! (ii) define dimensions
           stat = nf90_def_dim(ncfileID, 'c', max_c, dim_c_ID)
           if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)

! (iii) define variables
           stat = nf90_def_var(ncfileID, varName, varType, (/ dim_c_ID /), varID)
           if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)

! (iv) end define mode (commit to disk)
           stat = nf90_enddef(ncfileID)
           if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)

! (v) use val(:) to fill varID
           stat = nf90_put_var(ncfileID, varID, val)
           if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)

! (vi) close file
           stat = nf90_close(ncfileID)
           if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)

           clock(3) = MPI_Wtime()
        else
           ! do nothing
        end if
        call mpi_barrier(MPI_COMM_WORLD, ierr)
! -----------------------------------------------------------------------------------------
! *** PARALLEL -- collect data to master and then write to file

	if(myPE==0) then
           allocate(receive_val(1:max_c), numReceived(1:numProcs), displaceFromStart(1:numProcs), stat=allocStat)
           if (allocStat /= 0) then
              call mpi_abort(MPI_COMM_WORLD, allocStat, ierr)
              STOP 'could not allocate receive arrays'
           end if
        end if

        call mpi_barrier(MPI_COMM_WORLD, ierr)
        clock(4) = MPI_Wtime()

! *** need to pass relevant parts of each PE's (local) val(:)
        do dims=1, numDims
           count(dims) = dim_len(dims)/numProcs
           start(dims) = (myPE)*count(dims) + 1
           if(myPE == numProcs-1) then
              ! less PE may have less items
              count(dims) = dim_len(dims) - start(dims) + 1
           end if
           finish(dims) = start(dims) + count(dims) - 1
        end do

! vars for differing number of data from processors
        if (myPE==0) then
           if (numProcs>1) then
              do slave=1, numProcs-1
                 displaceFromStart(slave) = (slave-1)*(dim_len(1)/numProcs)
                 numReceived(slave) = (dim_len(1)/numProcs)
              end do
              displaceFromStart(numProcs) = displaceFromStart(numProcs-1) + numReceived(numProcs-1)
              numReceived(numProcs) = dim_len(1) - displaceFromStart(numProcs) 
           else
              displaceFromStart(1) = 0
              numReceived(1) = dim_len(1)
           end if
!!!           write(*,*) 'displace:',displaceFromStart
!!!           write(*,*) 'numRecvd:',numReceived
        end if

! only 1d
!!!        write(*,*) myPE, start(1), finish(1), count(1)
!!!        call mpi_barrier(MPI_COMM_WORLD, ierr)
! *** need to allow diff num from last PE
        call mpi_gatherv(val(start(1):finish(1)), count(1), MPI_DOUBLE_PRECISION, receive_val, numReceived, displaceFromStart, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) 
!!!        write(*,*) 'done gatherv'
!!!        if (myPE==0) write(*,*) numReceived
!!!        if (myPE==0) write(*,*) receive_val
        call mpi_barrier(MPI_COMM_WORLD, ierr)
        clock(5) = MPI_Wtime()

!!!        write(*,*) '...done gather'
        if (myPE==0) then

! *** check results
!!!           write(*,*) 'max diff=', maxval(abs(receive_val - val))

! (i) create file
           stat = nf90_create(path=dir//'/newFile_gather_f90.ncf', cmode=nf90_noclobber, ncid=ncfileID)
           if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)
! (ii) define dimensions
           stat = nf90_def_dim(ncfileID, 'c', max_c, dim_c_ID)
           if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)

! (iii) define variables
           stat = nf90_def_var(ncfileID, varName, varType, (/ dim_c_ID /), varID)
           if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)

! (iv) end define mode (commit to disk)
           stat = nf90_enddef(ncfileID)
           if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)

! (v) use receive_val(:) to fill varID
           stat = nf90_put_var(ncfileID, varID, receive_val)
           if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)

! (vi) close file
           stat = nf90_close(ncfileID)
           if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)

        end if
        call mpi_barrier(MPI_COMM_WORLD, ierr)
	clock(6) = MPI_Wtime()
!!!        write(*,*) 'done gather+write'

! -----------------------------------------------------------------------------------------
! *** PARALLEL netcdf version (use same array)

! (i) create file
!!!	   write(*,*) 'p-netcdf: create'
           stat = nfmpi_create(MPI_COMM_WORLD, dir//'/newFile_p.ncf', nf_noclobber, MPI_INFO_NULL, ncfileID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
!!!	   write(*,*) 'done'	
! (ii) define dimensions
!!!	   write(*,*) 'p-netcdf: def dim c'
           stat = nfmpi_def_dim(ncfileID, 'c', dim_len(1), dim_c_ID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
!!!	   write(*,*) 'done'	

           call mpi_barrier(MPI_COMM_WORLD, ierr)

! (iii) define variables
!!!	   write(*,*) 'p-netcdf: def var'
           stat = nfmpi_def_var(ncfileID, varName, varType, numDims, (/ dim_c_ID /), varID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
!!!	   write(*,*) 'done'	

! (iv) end define mode (commit to disk)
!!!	   write(*,*) 'p-netcdf: end def'
           stat = nfmpi_enddef(ncfileID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
!!!	   write(*,*) 'done'	

           call mpi_barrier(MPI_COMM_WORLD, ierr)
       	   if (myPE == 0) clock(7) = MPI_Wtime()

! (v) use val(:) to fill varID
!!!	   write(*,*) 'p-netcdf: put var'
! NB: collective 'all vals of var' not exist
!!!	   write(*,*) SHAPE(val)
!!!	   write(*,*) start, count
!!!	   write(*,*) start+count
	   start(1)=1
 	   count(1)=max_c
!!!        write(10+myPE,*) start
!!!        write(10+myPE,*) count
!!!        write(10+myPE,*) val(start(1):start(1)+count(1)-1)
           stat = nfmpi_put_vara_double_all(ncfileID, varID, start, count, val) 
!           stat = nfmpi_put_vara_double(ncfileID, varID, start, count, val) 
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
!!!	   write(*,*) 'done'	

! (vi) close file
           stat = nfmpi_close(ncfileID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat) 
           call mpi_barrier(MPI_COMM_WORLD, ierr)
       	   if (myPE == 0) clock(8) = MPI_Wtime()

! -----------------------------------------------------------------------------------------
! *** PARALLEL netcdf version (diff PEs write different sections)

! (i) create file
!!!	   write(*,*) 'p-netcdf PAR: create'
           stat = nfmpi_create(MPI_COMM_WORLD, dir//'/newFile_p2.ncf', nf_noclobber, MPI_INFO_NULL, ncfileID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
!!!	   write(*,*) 'done'	
! (ii) define dimensions
!!!	   write(*,*) 'p-netcdf PAR: def dim c'
           stat = nfmpi_def_dim(ncfileID, 'c', dim_len(1), dim_c_ID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
!!!	   write(*,*) 'done'	

           call mpi_barrier(MPI_COMM_WORLD, ierr)

! (iii) define variables
!!!	   write(*,*) 'p-netcdf PAR: def var'
           stat = nfmpi_def_var(ncfileID, varName, varType, numDims, (/ dim_c_ID /), varID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
!!!	   write(*,*) 'done'	

! (iv) end define mode (commit to disk)
!!!	   write(*,*) 'p-netcdf PAR: end def'
           stat = nfmpi_enddef(ncfileID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
!!!	   write(*,*) 'done'	

           call mpi_barrier(MPI_COMM_WORLD, ierr)
       	   if (myPE == 0) clock(9) = MPI_Wtime()

! (v) use val(:) to fill varID *** each PE writes a segment ***
!!!	   write(*,*) 'p-netcdf PAR: put var'
! NB: collective 'all vals of var' not exist
!!!	   write(*,*) SHAPE(val)
	   do dims=1, numDims
	     count(dims) = dim_len(dims)/numProcs
	     start(dims) = (myPE)*count(dims) + 1
	     if(myPE == numProcs-1) then
	       ! less PE may have less items
	       count(dims) = dim_len(dims) - start(dims) + 1
	     end if
	   end do
!!!	   write(*,'("start:",4(i9,1x))') start
!!!	   write(*,'("count:",4(i9,1x))') count
!!!	   write(*,'(" tots:",4(i9,1x))') start+count-1
!!!	   write(*,*) 'my first val=', val(start)

	   stat = nfmpi_begin_indep_data(ncfileID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)

           stat = nfmpi_put_vara_double(ncfileID, varID, start, count, val(start(1)))
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)

	   stat = nfmpi_end_indep_data(ncfileID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)	   

!!!	   write(*,*) 'done'	
           call mpi_barrier(MPI_COMM_WORLD, ierr)

! (vi) close file
           stat = nfmpi_close(ncfileID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)

           call mpi_barrier(MPI_COMM_WORLD, ierr)
       	   if (myPE == 0) clock(10) = MPI_Wtime()


 10        FORMAT(1x,'(a)',10(1x,i3))
! -----------------------------------------------------------------------------------------
! *** output timing info
	if(myPE==0) then
	  clock_info(02) = 'initialize array'

          clock_info(03) = '  netcdf f90:   create and fill'

          clock_info(04) = '      gather:   allocate'
          clock_info(05) = '      gather:   gather'
          clock_info(06) = '      gather:   fill'

          clock_info(07) = 'p-netcdf:       create/define'
          clock_info(08) = 'p-netcdf:       fill'

          clock_info(09) = 'p-netcdf/split: create/define'
          clock_info(10) = 'p-netcdf/split: fill'
          call output_clock(clock, clock_info, 10, myPE)
	end if
	call mpi_finalize(ierr)

      end program netcdf_speed_test_1d
       


More information about the parallel-netcdf mailing list