program test_view implicit none include 'mpif.h' ! This program writes a single distributed 3D field to a file using calls to MPI_Type_create_subarray, ! MPI_File_set_view and MPI_File_write_all. It then attempts to append two 1D variables to the end of the file ! from a single rank. I've implemented three scenarios for the 1D writes. See the case structure for scenarios. ! ! The final size of the file should be [(516x516x72)+(2*72)]*4 = 76682304 Bytes. ! This is the case under scenario 0 and 1 for all process counts (isize*jsize) I've tested. ! I get the correct size under scenario 2 for all process counts tested only ! if isize is 3 or smaller. ! With isize=4, I get a file that is 76683852 B (+1548B) ! isize=5 -> 76683952 (+100B) ! isize=6 -> 76684024 (+72B) ! isize=7 -> 76684072 (+48B) ! isize=8 -> 76685912 (+1840B) ! isize=9 -> 76685968 (+56B) ! These results are from using the Intel 12.1.4.319 compiler with SGI MPT-2.06 on the pleiades platform. ! I've also confirmed that the odd behavior occurs under MPT-1.25, IntelMPI-4.0.2.003, IntelMPI-3.1.038, ! MVAPICH2-1.8 and OpenMPI-1.5.5 on the same platform with the same compiler. ! ! The odd behavior is present when writing to Lustre or a RAM FS on pleiades. ! ! The odd behavior is present on the discover platform using Intel 12.1.4.319 with IntelMPI-4.0.3.008 and writing to a GPFS file ststem. ! Is this expected behavior? integer :: scenario=2 integer, parameter :: IM_WORLD= 516 integer, parameter :: JM_WORLD= 516 integer, parameter :: KM_WORLD= 72 integer, parameter :: isize= 9 integer, parameter :: jsize= 1 integer info, mpi_size, mpi_rank, filetype, status, mpistatus(MPI_STATUS_SIZE) integer :: numwrite integer i, j, k, l, n, rm integer unit integer, allocatable :: IM_LOCAL(:) integer, allocatable :: JM_LOCAL(:) real(kind=4), allocatable :: udata(:,:,:) real(kind=4), allocatable :: zdata(:) character(len = *), parameter:: file_name="tst_2Dlayout.bin" integer istart, jstart, global_sizes(3), local_sizes(3), starts(3) integer (kind=MPI_OFFSET_KIND) :: disp=0 integer (kind=MPI_OFFSET_KIND) :: offset character(len=MPI_MAX_INFO_KEY ) :: key character(len=MPI_MAX_INFO_VAL ) :: value integer :: nkeys, flag, valuelen=MPI_MAX_INFO_VAL ! Initialize MPI ! -------------- call MPI_Init(status) call MPI_Comm_size(MPI_COMM_WORLD, mpi_size, status) call MPI_Comm_rank(MPI_COMM_WORLD, mpi_rank, status) ! Create MPI Info object so we can pass hints to the MPI implementation ! --------------------------------------------------------------------- call MPI_Info_create(info, status) ! call MPI_Info_set(info, "romio_cb_write", "enable", STATUS) ! call MPI_Info_set(info, "striping_factor", "3", STATUS) ! call MPI_Info_set(info, "direct_write", "false", status) ! call MPI_Info_set(info, "romio_cb_write", "enable", status) ! call MPI_Info_set(info, "cb_buffer_size", "21299200", status) ! call MPI_Info_set(info, "striping_factor", "16", status) ! call MPI_Info_set(info, "striping_unit", "1048576", status) ! call MPI_Info_set(info, "striping_unit", "4194304", status) ! call MPI_Info_set(info, "striping_unit", "4259840", status) ! call MPI_Info_set(info, "romio_lustre_start_iodevice", "-1", status) if(mpi_size /= isize*jsize) then print*,'Must use ',isize*jsize,' processes' stop endif ! Divy up the globe along Longitude ! --------------------------------- allocate(IM_LOCAL(isize), stat=status) i = IM_WORLD / isize rm = IM_WORLD-isize*i do n=1, isize IM_LOCAL(n) = IM_WORLD / isize if( n <= rm ) IM_LOCAL(n) = IM_LOCAL(n) + 1 enddo ! print*, "im_local ",IM_LOCAL ! print*, "sum of IM_LOCAL ",sum(IM_LOCAL) ! Divy up the globe along Latitude ! -------------------------------- allocate(JM_LOCAL(jsize), stat=status) j = JM_WORLD / jsize rm = JM_WORLD-jsize*j do n=1, jsize JM_LOCAL(n) = JM_WORLD / jsize if( n <= rm ) JM_LOCAL(n) = JM_LOCAL(n) + 1 enddo ! print*, "jm_local ",JM_LOCAL ! print*, "sum of JM_LOCAL ",sum(JM_LOCAL) ! Create a file then open on all ranks ! ------------------------------------ if (mpi_rank == 0) then call MPI_File_open(MPI_COMM_SELF, file_name, IOR(MPI_MODE_WRONLY,MPI_MODE_CREATE), info, unit, status) allocate(udata(IM_WORLD , JM_WORLD, KM_WORLD), stat=status) udata=Z'7FA00000' ! call MPI_File_write(unit, udata, size(udata), MPI_REAL4, mpistatus, status) allocate(zdata(KM_WORLD), stat=status) zdata=Z'7FA00000' ! call MPI_File_write(unit, zdata, KM_WORLD, MPI_REAL4, mpistatus, status) call MPI_File_close(unit, status) deallocate(udata) deallocate(zdata) ! Check default info ! ------------------ call MPI_Info_get_nkeys(info, nkeys, STATUS) if(nkeys > 0) then do i=0,nkeys-1 call MPI_Info_get_nthkey(info, i, key, STATUS) call MPI_Info_get(info, key, valuelen, value, flag, STATUS) write(*,'("MPI_Info Key-Value pair ",a," -> ",a)') trim(key),trim(value) enddo else print*,'No default info settings' endif endif ! Reopen on all ranks ! ------------------- call MPI_Barrier(MPI_COMM_WORLD, status) call MPI_File_open(MPI_COMM_WORLD, file_name, MPI_MODE_WRONLY, info, unit, status) ! Fake data to help determine where data lands on disk ! ---------------------------------------------------- allocate(udata(IM_LOCAL(mod(mpi_rank,isize)+1) , JM_LOCAL((mpi_rank/isize)+1), KM_WORLD), stat=status) allocate(zdata(KM_WORLD), stat=status) do k=1, KM_WORLD do j=1, JM_LOCAL((mpi_rank/isize)+1) do i=1, IM_LOCAL(mod(mpi_rank,isize)+1) ! udata(i,j,k) = (1 + mpi_rank) * (k + 1) udata(i,j,k) = float(mpi_rank) enddo enddo zdata(k)=-float(k) enddo ! Set up selection parameters ! --------------------------- istart = sum(IM_LOCAL(1:mod(mpi_rank,isize)+1)) - IM_LOCAL(mod(mpi_rank,isize)+1) jstart = sum(JM_LOCAL(1:(mpi_rank/isize)+1)) - JM_LOCAL((mpi_rank/isize)+1) ! print*, "istart and jstart ",istart,jstart global_sizes(1) = IM_WORLD ; global_sizes(2)=JM_WORLD ; global_sizes(3)=KM_WORLD local_sizes(1) = IM_LOCAL(mod(mpi_rank,isize)+1) local_sizes(2) = JM_LOCAL((mpi_rank/isize)+1) local_sizes(3) = KM_WORLD starts(1) = istart ; starts(2)=jstart ; starts(3)=0 print*,'local_sizes ',local_sizes, product(local_sizes) print*, "starts ",starts ! Create the DDT ! -------------- call MPI_Type_create_subarray(3, global_sizes, local_sizes, starts, MPI_ORDER_FORTRAN, MPI_REAL4, filetype, status) call MPI_Type_commit(filetype, status) ! Determine our view of the file ! ------------------------------ call MPI_File_set_view(unit, disp, MPI_REAL4, filetype, "native", info, status) if(status /= MPI_SUCCESS) print*,'status from MPI_File_set_view is ',status call MPI_File_get_position(unit,offset,status) ! print*,'offset after set_view is ',offset call MPI_File_get_byte_offset(unit,offset,disp,status) ! print*,'disp after set_view is ',disp call MPI_Type_free(filetype, status) if(status /= MPI_SUCCESS) print*,'status from MPI_Type_free is ',status ! Write a 3D variable ! ------------------- call MPI_File_write_all(unit, udata, size(udata), MPI_REAL4, mpistatus, status) if(status /= MPI_SUCCESS) print*,'status from MPI_File_write_all is ',status call MPI_Get_count( mpistatus, MPI_REAL4, numwrite, status) if(status /= MPI_SUCCESS) print*,'status from MPI_Get_count is ',status ! print*,'I wrote ',numwrite call MPI_File_get_position(unit,offset,status) call MPI_File_get_byte_offset(unit,offset,disp,status) print*,'1st offset and disp are ',offset,disp ! Now write some 1D vars from rank 0 ! ---------------------------------- select case (scenario) case(0) print*,'Option 0' call MPI_File_sync(unit, status) call MPI_File_close(unit, status) if (mpi_rank == 0) then open(unit=unit,file=file_name,status='old',access='stream',action='write',form='unformatted',position='append') write(unit=unit) zdata write(unit=unit) zdata close(unit) endif call MPI_Barrier(MPI_COMM_WORLD, status) case(1) print*,'Option 1' call MPI_File_sync(unit, status) call MPI_File_close(unit, status) if (mpi_rank == 0) then call MPI_File_open(MPI_COMM_SELF, file_name, MPI_MODE_WRONLY, info, unit, status) if(status /= MPI_SUCCESS) print*,'status from MPI_File_open is ',status offset=disp ! or product(global_sizes)*4 call MPI_File_seek(unit, offset, MPI_SEEK_SET, status) if(status /= MPI_SUCCESS) print*,'status from MPI_File_seek is ',status call MPI_File_write(unit, zdata, KM_WORLD, MPI_REAL4, mpistatus, status) if(status /= MPI_SUCCESS) print*,'status from MPI_File_write is ',status call MPI_Get_count( mpistatus, MPI_REAL4, numwrite, status) if(status /= MPI_SUCCESS) print*,'status from MPI_Get_count is ',status print*,'I wrote ',numwrite call MPI_File_write(unit, zdata, KM_WORLD, MPI_REAL4, mpistatus, status) if(status /= MPI_SUCCESS) print*,'status from 2nd MPI_File_write is ',status call MPI_Get_count( mpistatus, MPI_REAL4, numwrite, status) if(status /= MPI_SUCCESS) print*,'status from 2nd MPI_Get_count is ',status print*,'I wrote ',numwrite call MPI_File_get_position(unit,offset,status) if(status /= MPI_SUCCESS) print*,'status from 3rd MPI_File_get_position is ',status call MPI_File_get_byte_offset(unit,offset,disp,status) if(status /= MPI_SUCCESS) print*,'status from 3rd MPI_File_get_byte_offset is ',status print*,'3rd offset and disp are ',offset,disp call MPI_File_sync(unit, status) call MPI_File_close(unit, status) endif call MPI_Barrier(MPI_COMM_WORLD, status) case(2) ! Final size is 76682304 if isize <= 3 ! Final size is 76683852 if isize > 3 ! outcome seems impervious to value of jsize (tested 1-7) print*,'Option 2' if (mpi_rank == 0) then ! offset=product(global_sizes)/4 ! call MPI_File_seek(unit, offset, MPI_SEEK_SET, status) ! if(status /= MPI_SUCCESS) print*,'status from MPI_File_seek is ',status call MPI_File_write(unit, zdata, KM_WORLD, MPI_REAL4, mpistatus, status) if(status /= MPI_SUCCESS) print*,'status from MPI_File_write is ',status call MPI_Get_count( mpistatus, MPI_REAL4, numwrite, status) if(status /= MPI_SUCCESS) print*,'status from MPI_Get_count is ',status print*,'I wrote ',numwrite call MPI_File_write(unit, zdata, KM_WORLD, MPI_REAL4, mpistatus, status) if(status /= MPI_SUCCESS) print*,'status from 2nd MPI_File_write is ',status call MPI_Get_count( mpistatus, MPI_REAL4, numwrite, status) if(status /= MPI_SUCCESS) print*,'status from 2nd MPI_Get_count is ',status print*,'I wrote ',numwrite call MPI_File_get_position(unit,offset,status) if(status /= MPI_SUCCESS) print*,'status from 3rd MPI_File_get_position is ',status call MPI_File_get_byte_offset(unit,offset,disp,status) if(status /= MPI_SUCCESS) print*,'status from 3rd MPI_File_get_byte_offset is ',status print*,'3rd offset and disp are ',offset,disp endif call MPI_Barrier(MPI_COMM_WORLD, status) call MPI_File_sync(unit, status) call MPI_File_close(unit, status) case(3) ! call MPI_File_write_at(unit, disp/4, zdata, KM_WORLD, MPI_REAL4, mpistatus, status) ! if(status /= MPI_SUCCESS) print*,'status from MPI_File_write_at is ',status ! call MPI_Get_count( mpistatus, MPI_REAL4, numwrite, status) ! if(status /= MPI_SUCCESS) print*,'status from MPI_Get_count is ',status ! print*,'I wrote ',numwrite ! call MPI_File_get_position(unit,offset,status) ! if(status /= MPI_SUCCESS) print*,'status from 2nd MPI_File_get_position is ',status ! call MPI_File_get_byte_offset(unit,offset,disp,status) ! if(status /= MPI_SUCCESS) print*,'status from 2nd MPI_File_get_byte_offset is ',status ! print*,'2nd offset and disp are ',offset,disp ! call MPI_File_close(unit, status) ! call MPI_File_open(MPI_COMM_SELF, file_name, MPI_MODE_WRONLY, info, unit, status) ! if(status /= MPI_SUCCESS) print*,'status from MPI_File_open is ',status ! call MPI_File_seek(unit, offset+KM_WORLD, MPI_SEEK_SET, status) ! if(status /= MPI_SUCCESS) print*,'status from 2nd MPI_File_seek is ',status ! call MPI_File_write(unit, zdata, KM_WORLD, MPI_REAL4, mpistatus, status) ! if(status /= MPI_SUCCESS) print*,'status from 2nd MPI_File_write is ',status !! call MPI_File_write_at(unit, (disp/4)+72, zdata, KM_WORLD, MPI_REAL4, mpistatus, status) !! if(status /= MPI_SUCCESS) print*,'status from 2nd MPI_File_write_at is ',status ! call MPI_Get_count( mpistatus, MPI_REAL4, numwrite, status) ! if(status /= MPI_SUCCESS) print*,'status from 2nd MPI_Get_count is ',status ! print*,'I wrote ',numwrite ! call MPI_File_get_position(unit,offset,status) ! if(status /= MPI_SUCCESS) print*,'status from 3rd MPI_File_get_position is ',status ! call MPI_File_get_byte_offset(unit,offset,disp,status) ! if(status /= MPI_SUCCESS) print*,'status from 3rd MPI_File_get_byte_offset is ',status ! print*,'3rd offset and disp are ',offset,disp ! call MPI_File_sync(unit, status) ! call MPI_File_close(unit, status) end select ! Cleaup ! ------ deallocate(udata) deallocate(zdata) deallocate(IM_LOCAL) deallocate(JM_LOCAL) call MPI_Info_free(info, status) call MPI_Finalize(status) end program test_view