> The default I/O mode for pnetcdf is collective, to switch to independent 
> mode, wrap the indep I/O calls by
>          stat = nfmpi_begin_indep_data(ncfileID)   and
>          stat = nfmpi_end_indep_data(ncfileID)


> I don't know the answer to the error messages between F77 and F90.
> I also wonder if they are defined the same in NetCDF (the serial version).

f77 and f90 seem the same for those error msgs I've seen (eg -35 =
'netCDF file exists && NC_NOCLOBBER')

But returning to my example (full main Fortran s/prog unit below), I
can't see why I'm not getting series 1,2,..,100 in the output file (last
part of the code, the /tmp/newFile_p2.ncf o/p file), but rather on 2 PEs
both processes seem to be writing 1,2,..50 so I'm guessing I'm missing
something (my impression was that each PE would write val(start),
val(start+1) for its local value of start) 

also attached is /tmp/out which is output from
~/Fortran/MPI/parallel_netCDF$ rm /tmp/new*ncf && prun -B3 -l "%m%n:
rank %g: " -n 2 -p login ./speed_1d_f90  > /tmp/out 2>&1

cheers, michael
horace3: rank 0: Using qxelan driver, build for MPIBull2 0.9.7-t (Ishtar) 20060726-1607
horace3: rank 1: Hi from # 1/ 2
horace3: rank 0: Hi from # 0/ 2
horace3: rank 0:  p-netcdf: create
horace3: rank 1:  p-netcdf: create
horace3: rank 0:  done
horace3: rank 0:  p-netcdf: def dim c
horace3: rank 1:  done
horace3: rank 0:  done
horace3: rank 1:  p-netcdf: def dim c
horace3: rank 1:  done
horace3: rank 0: forrtl: warning (402): fort: (1): In call to NFMPI_DEF_VAR, an array temporary was created for argument #5
horace3: rank 0: 
horace3: rank 0:  p-netcdf: def var
horace3: rank 0:  done
horace3: rank 0:  p-netcdf: end def
horace3: rank 1: forrtl: warning (402): fort: (1): In call to NFMPI_DEF_VAR, an array temporary was created for argument #5
horace3: rank 1: 
horace3: rank 1:  p-netcdf: def var
horace3: rank 1:  done
horace3: rank 1:  p-netcdf: end def
horace3: rank 0:  done
horace3: rank 1:  done
horace3: rank 0:  p-netcdf: put var
horace3: rank 1:  p-netcdf: put var
horace3: rank 1:  done
horace3: rank 0:  done
horace3: rank 0:  p-netcdf PAR: create
horace3: rank 1:  p-netcdf PAR: create
horace3: rank 0:  done
horace3: rank 0:  p-netcdf PAR: def dim c
horace3: rank 1:  done
horace3: rank 0:  done
horace3: rank 1:  p-netcdf PAR: def dim c
horace3: rank 1:  done
horace3: rank 0:  p-netcdf PAR: def var
horace3: rank 1:  p-netcdf PAR: def var
horace3: rank 0: forrtl: warning (402): fort: (1): In call to NFMPI_DEF_VAR, an array temporary was created for argument #5
horace3: rank 0: 
horace3: rank 0:  done
horace3: rank 0:  p-netcdf PAR: end def
horace3: rank 1: forrtl: warning (402): fort: (1): In call to NFMPI_DEF_VAR, an array temporary was created for argument #5
horace3: rank 1: 
horace3: rank 1:  done
horace3: rank 1:  p-netcdf PAR: end def
horace3: rank 0:  done
horace3: rank 1:  done
horace3: rank 0:  p-netcdf PAR: put var
horace3: rank 1:  p-netcdf PAR: put var
horace3: rank 0: forrtl: warning (402): fort: (1): In call to I/O Write routine, an array temporary was created for argument #2
horace3: rank 0: 
horace3: rank 0:          100
horace3: rank 0: start:  1
horace3: rank 0: count: 50
horace3: rank 0:  tots: 50
horace3: rank 1:          100
horace3: rank 0:  my first val=   1.00000000000000     
horace3: rank 1: start: 51
horace3: rank 1: count: 50
horace3: rank 1:  tots:100
horace3: rank 1: forrtl: warning (402): fort: (1): In call to I/O Write routine, an array temporary was created for argument #2
horace3: rank 1: 
horace3: rank 1:  my first val=   51.0000000000000     
horace3: rank 0:  done
horace3: rank 1:  done
horace3: rank 0: Proc # 0   netcdf f90:   initialize array                                                                                                : 0.13828E-04sec
horace3: rank 0: Proc # 0   netcdf f90:   time to create and fill                                                                                         : 0.76318E-03sec
horace3: rank 0: Proc # 0 (barrier)                                                                                                                       : 0.95367E-06sec
horace3: rank 1: Proc # 1   netcdf f90:   initialize array                                                                                                : 0.00000E+00sec
horace3: rank 0: Proc # 0 p-netcdf:       time to create/define                                                                                           : 0.17369E-01sec
horace3: rank 0: Proc # 0 p-netcdf:       time to fill                                                                                                    : 0.31710E-02sec
horace3: rank 0: Proc # 0 (barrier)                                                                                                                       : 0.00000E+00sec
horace3: rank 0: Proc # 0 p-netcdf/split: time to create/define                                                                                           : 0.18633E-01sec
horace3: rank 0: Proc # 0 p-netcdf/split: time to fill                                                                                                    : 0.17010E-01sec
horace3: rank 1: Proc # 1   netcdf f90:   time to create and fill                                                                                         : 0.00000E+00sec
horace3: rank 1: Proc # 1 (barrier)                                                                                                                       : 0.11824E+10sec
horace3: rank 1: Proc # 1 p-netcdf:       time to create/define                                                                                           :-0.11824E+10sec
horace3: rank 1: Proc # 1 p-netcdf:       time to fill                                                                                                    : 0.00000E+00sec
horace3: rank 1: Proc # 1 (barrier)                                                                                                                       : 0.11824E+10sec
horace3: rank 1: Proc # 1 p-netcdf/split: time to create/define                                                                                           :-0.11824E+10sec
horace3: rank 1: Proc # 1 p-netcdf/split: time to fill                                                                                                    : 0.00000E+00sec
      program netcdf_speed_test_1d

        use netcdf
        use mpi
        implicit none

#include ""

! test to compare times taken to read/write netCDF with/with parallel netCDF
! uses variable: dummy(c) for 
        integer, parameter:: max_c=100
        character*(5), parameter:: varName='dummy'
        integer, parameter:: varType=nf90_double  ! relates to NF_TYPE for val

	integer:: dims
	integer, parameter:: numDims=1

! p-netCDF required lengths to be NFMPI_OFFSET not integer
	NFMPI_OFFSET:: dim_len(numDims)=(/100/)  ! same as max_c

        integer:: ncfileID, dimID, dim_c_ID, varID
        integer:: stat

        NFMPI_OFFSET:: start(numDims), count(numDims)
!!        data start /1/
!!        data count /max_c/

! non-netcdf vars:
        integer:: c, r, l, t
        double precision:: val(max_c)

        integer, parameter:: numTimingPts=9, maxInfo=128
        double precision:: clock(numTimingPts)
        character*(maxInfo), dimension(numTimingPts):: clock_info
        integer:: ierr, myPE, numProcs

! -----------------------------------------------------------------------------------------
        call MPI_INIT( ierr )
        call MPI_COMM_RANK( MPI_COMM_WORLD, myPE, ierr )
        call MPI_COMM_SIZE( MPI_COMM_WORLD, numProcs, ierr )

        write(*,'("Hi from #",i2,"/",i2)') myPE, numProcs

! define values for val(:,:,:,:)
        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
        if (myPE==0) clock(1) = MPI_Wtime()
        do c=1, max_c
	   val(c) = float(c)
        end do

        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
! -----------------------------------------------------------------------------------------
! *** netcdf f90 version: using only master proc
        if (myPE == 0) then
           clock(2) = MPI_Wtime()
! (i) create file
           stat = nf90_create(path='/tmp/newFile_f90.ncf', cmode=nf90_noclobber, ncid=ncfileID)
           if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)
! (ii) define dimensions
           stat = nf90_def_dim(ncfileID, 'c', max_c, dim_c_ID)
           if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)

! (iii) define variables
           stat = nf90_def_var(ncfileID, varName, varType, (/ dim_c_ID /), varID)
           if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)

! (iv) end define mode (commit to disk)
           stat = nf90_enddef(ncfileID)
           if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)

! (v) use val(:) to fill varID
           stat = nf90_put_var(ncfileID, varID, val)
           if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)

! (vi) close file
           stat = nf90_close(ncfileID)
           if (stat /= nf90_noerr) call print_nf90error_and_abort(stat)

           clock(3) = MPI_Wtime()
           ! do nothing
        end if
        call mpi_barrier(MPI_COMM_WORLD, ierr)
! -----------------------------------------------------------------------------------------
! *** PARALLEL netcdf version (use same array)
           clock(4) = MPI_Wtime()
! (i) create file
	   write(*,*) 'p-netcdf: create'
           stat = nfmpi_create(MPI_COMM_WORLD, '/tmp/newFile_p.ncf', nf_noclobber, MPI_INFO_NULL, ncfileID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
	   write(*,*) 'done'	
! (ii) define dimensions
	   write(*,*) 'p-netcdf: def dim c'
           stat = nfmpi_def_dim(ncfileID, 'c', dim_len(1), dim_c_ID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
	   write(*,*) 'done'	

           call mpi_barrier(MPI_COMM_WORLD, ierr)

! (iii) define variables
	   write(*,*) 'p-netcdf: def var'
           stat = nfmpi_def_var(ncfileID, varName, varType, numDims, (/ dim_c_ID /), varID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
	   write(*,*) 'done'	

! (iv) end define mode (commit to disk)
	   write(*,*) 'p-netcdf: end def'
           stat = nfmpi_enddef(ncfileID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
	   write(*,*) 'done'	

           call mpi_barrier(MPI_COMM_WORLD, ierr)
       	   if (myPE == 0) clock(5) = MPI_Wtime()

! (v) use val(:) to fill varID
	   write(*,*) 'p-netcdf: put var'
! NB: collective 'all vals of var' not exist
!!!	   write(*,*) SHAPE(val)
!!!	   write(*,*) start, count
!!!	   write(*,*) start+count
           stat = nfmpi_put_vara_double_all(ncfileID, varID, start, count, val) 
!           stat = nfmpi_put_vara_double(ncfileID, varID, start, count, val) 
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
	   write(*,*) 'done'	

! (vi) close file
           stat = nfmpi_close(ncfileID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat) 
           call mpi_barrier(MPI_COMM_WORLD, ierr)
       	   if (myPE == 0) clock(6) = MPI_Wtime()

! -----------------------------------------------------------------------------------------
! *** PARALLEL netcdf version (diff PEs write different sections)
           clock(7) = MPI_Wtime()
! (i) create file
	   write(*,*) 'p-netcdf PAR: create'
           stat = nfmpi_create(MPI_COMM_WORLD, '/tmp/newFile_p2.ncf', nf_noclobber, MPI_INFO_NULL, ncfileID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
	   write(*,*) 'done'	
! (ii) define dimensions
	   write(*,*) 'p-netcdf PAR: def dim c'
           stat = nfmpi_def_dim(ncfileID, 'c', dim_len(1), dim_c_ID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
	   write(*,*) 'done'	

           call mpi_barrier(MPI_COMM_WORLD, ierr)

! (iii) define variables
	   write(*,*) 'p-netcdf PAR: def var'
           stat = nfmpi_def_var(ncfileID, varName, varType, numDims, (/ dim_c_ID /), varID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
	   write(*,*) 'done'	

! (iv) end define mode (commit to disk)
	   write(*,*) 'p-netcdf PAR: end def'
           stat = nfmpi_enddef(ncfileID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)
	   write(*,*) 'done'	

           call mpi_barrier(MPI_COMM_WORLD, ierr)
       	   if (myPE == 0) clock(8) = MPI_Wtime()

! (v) use val(:) to fill varID *** each PE writes a segment ***
	   write(*,*) 'p-netcdf PAR: put var'
! NB: collective 'all vals of var' not exist
	   write(*,*) SHAPE(val)
	   do dims=1, numDims
	     count(dims) = dim_len(dims)/numProcs
	     start(dims) = (myPE)*count(dims) + 1
	     if(myPE == numProcs-1) then
	       ! less PE may have less items
	       count(dims) = dim_len(dims) - start(dims) + 1
	     end if
	   end do
	   write(*,'("start:",4(i3,1x))') start
	   write(*,'("count:",4(i3,1x))') count
	   write(*,'(" tots:",4(i3,1x))') start+count-1
	   write(*,*) 'my first val=', val(start)

	   stat = nfmpi_begin_indep_data(ncfileID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)

           stat = nfmpi_put_vara_double(ncfileID, varID, start, count, val) 
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)

	   stat = nfmpi_end_indep_data(ncfileID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)	   

	   write(*,*) 'done'	
           call mpi_barrier(MPI_COMM_WORLD, ierr)

! (vi) close file
           stat = nfmpi_close(ncfileID)
           if (stat /= nf_noerr) call print_nf90error_and_abort(stat)

           call mpi_barrier(MPI_COMM_WORLD, ierr)
       	   if (myPE == 0) clock(9) = MPI_Wtime()

 10        FORMAT(1x,'(a)',10(1x,i3))
! -----------------------------------------------------------------------------------------
! *** output timing info
        clock_info(2) = '  netcdf f90:   initialize array'
        clock_info(3) = '  netcdf f90:   time to create and fill'
	clock_info(4) = '(barrier)'
        clock_info(5) = 'p-netcdf:       time to create/define'
        clock_info(6) = 'p-netcdf:       time to fill'
	clock_info(7) = '(barrier)'
        clock_info(8) = 'p-netcdf/split: time to create/define'
        clock_info(9) = 'p-netcdf/split: time to fill'
        call output_clock(clock, clock_info, 9, myPE)

	call mpi_finalize(ierr)

      end program netcdf_speed_test_1d
