David Scott d.scott at ed.ac.uk
Mon Apr 22 06:56:57 CDT 2013

```Hello,

I am working on a fluid-mechanical code to solve the two-phase
Navier–Stokes equations with levelset interface capturing. I have been
asked to replace the pressure calculation which uses the SOR and Jacobi
iterative schemes with a Krylov subspace method. I have done this and
the code is working but as I have never used PETSc before I would like
to know if improvements to my code, or the run time parameters that I am

I am using GMRES with a Block Jacobi pre-conditioner. I have tried
Conjugate Gradient with a Block Jacobi pre-conditioner but it diverges.
If I use GMRES for the first few thousand time steps and then swap to CG
it does converge but the speed of execution is somewhat reduced.

I have attached relevant excerpts from the code.

Yours sincerely,

David Scott
--
Dr. D. M. Scott
Applications Consultant
Edinburgh Parallel Computing Centre
Tel. 0131 650 5921

The University of Edinburgh is a charitable body, registered in
Scotland, with registration number SC005336.
-------------- next part --------------
program mainprogram
use petsc
use pressure_solver

implicit none

#include "finclude/petscdef.h"

! Many declarations deleted.

PetscInt :: dof,stencil_width
parameter (dof = 1, stencil_width = 1)
DM :: da
KSP :: ksp
PetscInt :: petsc_y_ranges(0:num_procs_x-1)
PetscReal :: rtol, abstol, dtol
PetscInt :: maxits, its
KSPType :: solver_type
PC :: pc
PCType :: pc_type
PCSide :: pc_side
MatNullSpace :: nullspace
PetscReal :: rnorm

! ****************************************************************************************
! MPI stuff

call PetscInitialize(PETSC_NULL_CHARACTER,ierr)
call MPI_Comm_size(PETSC_COMM_WORLD,num_procs,ierr)
call MPI_Comm_rank(PETSC_COMM_WORLD,my_id,ierr)

if(my_id==0)then
write(*,*) 'num_procs=', num_procs

if(num_procs_x*num_procs_y*num_procs_z/=num_procs)then
write(*,*) 'Error 1: domain decomposition inconsistent with number of processors, exiting'
stop
end if

if(num_procs_z/=1)then
write(*,*) 'Error 2: domain decomposition inconsistent with number of processors, exiting'
stop
end if

if((mod(maxl-1,num_procs_x)/=0).or.(mod(maxm-1,num_procs_y)/=0))then
write(*,*) 'Error 3: number of processors must evenly divide (grid dimension-1), exiting'
stop
end if

end if

dims(1) = num_procs_x
dims(2) = num_procs_y
dims(3) = num_procs_z

periodic(1) = .false.
periodic(2) = .true.
periodic(3) = .false.
reorder = .false.

Call mpi_cart_create(PETSC_COMM_WORLD,Ndim,dims,periodic,reorder,comm2d_quasiperiodic,ierr)
Call mpi_comm_rank(  comm2d_quasiperiodic,my_id,ierr)
Call mpi_cart_coords(comm2d_quasiperiodic,my_id,Ndim,coords,ierr)
Call get_mpi_neighbours(neighbours,comm2d_quasiperiodic)

call mpi_decomp_2d(sx,ex,sy,ey,n_local_x,n_local_y,maxl,maxm,coords,dims,Ndim)

petsc_y_ranges = n_local_x
petsc_y_ranges(0) = n_local_x + 1
petsc_y_ranges(num_procs_x-1) = n_local_x + 1

call DMDACreate3d(PETSC_COMM_WORLD,                               &
DMDA_BOUNDARY_PERIODIC,                                      &
DMDA_BOUNDARY_NONE,                                          &
DMDA_BOUNDARY_NONE,                                          &
DMDA_STENCIL_BOX,                                            &
global_dim_x, global_dim_y+2, global_dim_z+2,                &
num_procs_y, num_procs_x, num_procs_z,                       &
dof, stencil_width,                                          &
PETSC_NULL_INTEGER,                                          &
petsc_y_ranges,                                              &
PETSC_NULL_INTEGER,                                          &
da, ierr)

call KSPCreate(PETSC_COMM_WORLD, ksp, ierr)
call KSPSetFromOptions(ksp, ierr)
call DMSetInitialGuess(da, copy_pressure_in_to_petsc, ierr)
call KSPSetComputeRHS(ksp, compute_rhs, PETSC_NULL_OBJECT, ierr)
call KSPSetComputeOperators(ksp, compute_matrix, PETSC_NULL_OBJECT, ierr)
call KSPSetDM(ksp, da, ierr)
call MatNullSpaceCreate(PETSC_COMM_WORLD, PETSC_TRUE, 0, PETSC_NULL_OBJECT, nullspace, ierr)
call KSPSetNullSpace(ksp, nullspace, ierr)
call MatNullSpaceDestroy(nullspace, ierr)

call calculate_indices(da, sx, sy, ex, ey, n_local_x, n_local_y, my_id, ierr)

! Initialisation code deleted.

! Time loop
do iteration=1,n_timesteps

! Velocity calculations (including RHS_p) deleted.

call KSPSolve(ksp, PETSC_NULL_OBJECT, PETSC_NULL_OBJECT, ierr)

if (my_id==60) then
call output_converged_reason(ksp)
call KSPGetResidualNorm(ksp, rnorm, ierr)
write(*, *) 'approximate, preconditioned, residual norm', rnorm
call KSPGetIterationNumber(ksp, its, ierr)
write(*, *) 'iterations =', its
end if

call copy_pressure_out_of_petsc(ksp, ierr)

! Some more velocity stuff deleted.

end do

call PetscFinalize(ierr)

end program mainprogram
-------------- next part --------------
module pressure_solver
use petsc
implicit none

#include "finclude/petscdef.h"

PetscInt :: maxl, maxm, maxn
parameter (maxl = 481, maxm = 153, maxn = 153)
! Note that the first two dimensions are swapped so that as far as PETSc is concerned
! it is the first dimension that is periodic.
PetscInt :: global_dim_x, global_dim_y, global_dim_z
parameter (global_dim_x = maxm-1, global_dim_y = maxl-1, global_dim_z = maxn-1)
PetscInt :: x_start, y_start, z_start, x_width, y_width, z_width
PetscInt :: x_max, y_max, z_max
PetscInt :: x_start_ghost, y_start_ghost, z_start_ghost
PetscInt :: x_width_ghost, y_width_ghost, z_width_ghost
PetscInt ::  x_max_ghost, y_max_ghost, z_max_ghost
PetscScalar, allocatable, dimension(:, :, :) :: pres
PetscScalar :: dx, dy, dz, dt
PetscScalar, allocatable, dimension(:, :, :) :: RHS_p, u3
PetscScalar :: u_inlet(0:global_dim_z-1)

contains

subroutine calculate_indices(da, sx, sy, ex, ey, n_local_x, n_local_y, rank, ierr)
implicit none

DM :: da
PetscInt :: sx, sy, ex, ey, n_local_x, n_local_y, rank
PetscInt :: ierr

call DMDAGetCorners(da, x_start, y_start, z_start, x_width, y_width, z_width, ierr)
x_max = x_start + x_width - 1
y_max = y_start + y_width - 1
z_max = z_start + z_width - 1

if (sx==1 .AND. ((sx-1).NE.y_start .OR. ex/=y_max)) then
write(*, *) 'ID1', rank, 'sx-1, y_start, ex, y_max, n_local_x, y_width', sx-1, y_start, ex, y_max, n_local_x, y_width
end if
if (ex==(maxl-1) .AND. (sx.NE.y_start .OR. (ex+1)/=y_max)) then
write(*, *) 'ID2', rank, 'sx, y_start, ex+1, y_max, n_local_x, y_width', sx, y_start, ex+1, y_max, n_local_x, y_width
end if
if ((sx/=1 .AND. ex/=(maxl-1)) .AND. (sx/=y_start .OR. ex/=y_max)) then
write(*, *) 'ID3', rank, 'sx, y_start, ex, y_max, n_local_x, y_width', sx, y_start, ex, y_max, n_local_x, y_width
end if
if (sy/=(x_start+1) .OR. ey/=(x_max+1)) then
write(*, *) 'ID4', rank, 'sy, x_start, ey, x_max, n_local_y, x_width', sy, x_start, ey, x_max, n_local_y, x_width
end if

call DMDAGetGhostCorners(da, x_start_ghost, y_start_ghost, z_start_ghost,  &
x_width_ghost, y_width_ghost, z_width_ghost, ierr)
x_max_ghost = x_start_ghost + x_width_ghost - 1
y_max_ghost = y_start_ghost + y_width_ghost - 1
z_max_ghost = z_start_ghost + z_width_ghost - 1

if ((sx-1)/=y_start_ghost .OR. (ex+1)/=y_max_ghost) then
write(*, *) 'GHOST1', rank, 'sx-1, y_start_ghost, ex+1, y_max_ghost',  &
sx-1, y_start_ghost, ex+1, y_max_ghost
end if
if ((sy-1)/=(x_start_ghost+1) .OR. (ey+1)/=(x_max_ghost+1)) then
write(*, *) 'GHOST2', rank, 'sy-1, x_start_ghost+1, ey+1, x_max_ghost+1',  &
sy-1, x_start_ghost+1, ey+1, x_max_ghost+1
end if
if (z_start/=z_start_ghost .OR. z_width/=z_width_ghost) then
write(*, *) 'GHOST3', rank, 'z_start, z_start_ghost, z_width, z_width_ghost',  &
z_start, z_start_ghost, z_width, z_width_ghost
end if

end subroutine calculate_indices

subroutine copy_pressure_in_to_petsc(dm, x, ierr)
implicit none
! This routine copies the data generated by the other parts of the TPLS code into
! locations where they can be accessed by PETSc.
! The global pressure array in the non-PETSc code is pres_global(0:maxl, 0:maxm, 0:maxn).
! This includes a bounday layer on all sides even though the second dimension is periodic
! as the periodic structure is implemented by copying data one interior face of the cuboid
! to the opposite face in the boundary layer.
! So the interior is (1:maxl-1, 1:maxm-1, 1:maxn-1).
! The indexing of the data in the PETSc code is different for two reasons.
!  * PETSc lays out data on processes differently from the way that it is done in the
!    non-PETSc code which necessitates swapping of the first two dimensions.
!  * PETSc is instructed to impose the periodic boundary condition behind the scenes
!    (through specifying DMDA_BOUNDARY_PERIODIC for the appropriate dimesnion). The
!    ghost points that are required to do this are not visible in the global vector,
!    but they do appear in the local vectors.
! Consequently the interior in the PETSc code is (0:maxm-2, 1:maxl-1, 1:maxn-1)
! which may written as (0:global_dim_x-1, 1:global_dim_y, 1:global_dim_z)
! Including the explicit boundaries we have
!   (0:global_dim_x-1, 0:global_dim_y+1, 0:global_dim_z+1) or (0:maxm-2, 0:maxl, 0:maxn)
! Including the ghost points supplied by PETSc we have
!   (-1:global_dim_x, 0:global_dim_y+1, 0:global_dim_z+1) or (-1:maxm-1, 0:maxl, 0:maxn)
! Let a pressure datum have coordinates (i1, j1, k1) in the non-PETSc code and
! (i2, j2, k2) in the PETSc code, then
! i2 = j1 - 1
! j2 = i1
! k2 = k1, or
! i1 = j2
! j1 = i2+1
! k1 = k2.

DM, intent(inout) :: dm
Vec, intent(inout) :: x  ! x is a global vector.
PetscErrorCode, intent(inout) ::  ierr

PetscScalar, pointer, dimension(:, :, :) :: x_3da
PetscInt :: i, j, k

call DMDAVecGetArrayF90(dm, x, x_3da, ierr)

do k = z_start, z_max
do j = y_start, y_max
do i = x_start, x_max
x_3da(i, j, k) = pres(j, i+1, k)
end do
end do
end do

call DMDAVecRestoreArrayF90(dm, x, x_3da, ierr)

end subroutine copy_pressure_in_to_petsc

subroutine compute_rhs(ksp, b, dummy, ierr)
implicit none

KSP, intent(inout) :: ksp
Vec, intent(inout) :: b  ! b is a global vector.
integer, intent(inout) :: dummy(*)
PetscErrorCode, intent(inout) :: ierr

PetscScalar, pointer, dimension(:, :, :) :: b_3da
PetscInt :: i, j, k
DM :: dm

call KSPGetDM(ksp, dm, ierr)

call DMDAVecGetArrayF90(dm, b, b_3da, ierr)

do k = z_start, z_max
do j = y_start, y_max
do i = x_start, x_max
b_3da(i, j, k) = -RHS_p(j, i+1, k)
end do
end do
end do

if (y_start==0) then
do k = 1, global_dim_z
do i = x_start, x_max
b_3da(i, 0, k) = (dx/dt)*(u_inlet(k-1)-u3(0, i ,k-1))/dy
end do
end do
end if

call DMDAVecRestoreArrayF90(dm, b, b_3da, ierr)

end subroutine compute_rhs

subroutine compute_matrix(ksp, A, B, str, dummy, ierr)
implicit none

KSP, intent(inout) :: ksp
Mat, intent(inout) :: A, B
MatStructure, intent(inout) :: str
integer, intent(inout) :: dummy(*)
PetscErrorCode, intent(inout) :: ierr

PetscInt :: i, j, k
PetscScalar    :: v(7)
MatStencil     :: row(4), col(4, 7)

do k = z_start, z_max
do j = y_start, y_max
do i = x_start, x_max
row(MatStencil_i) = i
row(MatStencil_j) = j
row(MatStencil_k) = k
! Deal with the edges of the cuboid.
! The edges with i=0 and i=(global_dim_x-1) are taken care of by the periodic boundary condition.
if ((j==0 .AND. k==0) .OR.                  &
(j==(global_dim_y+1) .AND. k==0)) then
v(1) = 1/dz
col(MatStencil_i, 1) = i
col(MatStencil_j, 1) = j
col(MatStencil_k, 1) = k
v(2) = -1/dz
col(MatStencil_i, 2) = i
col(MatStencil_j, 2) = j
col(MatStencil_k, 2) = k+1
call MatSetValuesStencil(B, 1, row, 2, col, v, INSERT_VALUES, ierr)
else if ((j==0 .AND. k==(global_dim_z+1)) .OR.                  &
(j==(global_dim_y+1) .AND. k==(global_dim_z+1))) then
v(1) = -1/dz
col(MatStencil_i, 1) = i
col(MatStencil_j, 1) = j
col(MatStencil_k, 1) = k
v(2) = 1/dz
col(MatStencil_i, 2) = i
col(MatStencil_j, 2) = j
col(MatStencil_k, 2) = k-1
call MatSetValuesStencil(B, 1, row, 2, col, v, INSERT_VALUES, ierr)
! Deal with the faces of the cuboid, excluding the edges and the corners.
! The faces i=0 and i=(global_dim_x-1) are taken care of by the periodic boundary condition.
else if (j==0) then
! Von Neumann boundary condition on y=0 boundary.
v(1) = 1/dy
col(MatStencil_i, 1) = i
col(MatStencil_j, 1) = j
col(MatStencil_k, 1) = k
v(2) = -1/dy
col(MatStencil_i, 2) = i
col(MatStencil_j, 2) = j+1
col(MatStencil_k, 2) = k
call MatSetValuesStencil(B, 1, row, 2, col, v, INSERT_VALUES, ierr)
else if (j==(global_dim_y+1)) then
! Von Neumann boundary condition on y=(global_dim_y+1) boundary.
v(1) = -1/dy
col(MatStencil_i, 1) = i
col(MatStencil_j, 1) = j
col(MatStencil_k, 1) = k
v(2) = 1/dy
col(MatStencil_i, 2) = i
col(MatStencil_j, 2) = j-1
col(MatStencil_k, 2) = k
call MatSetValuesStencil(B, 1, row, 2, col, v, INSERT_VALUES, ierr)
else if (k==0) then
! Von Neumann boundary condition on z=0 boundary.
v(1) = 1/dz
col(MatStencil_i, 1) = i
col(MatStencil_j, 1) = j
col(MatStencil_k, 1) = k
v(2) = -1/dz
col(MatStencil_i, 2) = i
col(MatStencil_j, 2) = j
col(MatStencil_k, 2) = k+1
call MatSetValuesStencil(B, 1, row, 2, col, v, INSERT_VALUES, ierr)
else if (k==(global_dim_z+1)) then
! Von Neumann boundary conditions on z=(global_dim_z+1) boundary.
v(1) = -1/dz
col(MatStencil_i, 1) = i
col(MatStencil_j, 1) = j
col(MatStencil_k, 1) = k
v(2) = 1/dz
col(MatStencil_i, 2) = i
col(MatStencil_j, 2) = j
col(MatStencil_k, 2) = k-1
call MatSetValuesStencil(B, 1, row, 2, col, v, INSERT_VALUES, ierr)
else
! Deal with the interior.
! Laplacian in 3D.
v(1) = -1/dz**2
col(MatStencil_i, 1) = i
col(MatStencil_j, 1) = j
col(MatStencil_k, 1) = k-1
v(2) = -1/dy**2
col(MatStencil_i, 2) = i
col(MatStencil_j, 2) = j-1
col(MatStencil_k, 2) = k
v(3) = -1/dx**2
col(MatStencil_i, 3) = i-1
col(MatStencil_j, 3) = j
col(MatStencil_k, 3) = k
v(4) = 2*(1/dx**2+1/dy**2+1/dz**2)
col(MatStencil_i, 4) = i
col(MatStencil_j, 4) = j
col(MatStencil_k, 4) = k
v(5) = -1/dx**2
col(MatStencil_i, 5) = i+1
col(MatStencil_j, 5) = j
col(MatStencil_k, 5) = k
v(6) = -1/dy**2
col(MatStencil_i, 6) = i
col(MatStencil_j, 6) = j+1
col(MatStencil_k, 6) = k
v(7) = -1/dz**2
col(MatStencil_i, 7) = i
col(MatStencil_j, 7) = j
col(MatStencil_k, 7) = k+1
call MatSetValuesStencil(B, 1, row, 7, col, v, INSERT_VALUES, ierr)
end if
end do
end do
end do

call MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY, ierr)
call MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY, ierr)

str = SAME_NONZERO_PATTERN

if (A.ne.B) then
call MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY, ierr)
call MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY, ierr)
endif

end subroutine compute_matrix

subroutine copy_pressure_out_of_petsc(ksp, ierr)
implicit none

KSP, intent(in) :: ksp
PetscErrorCode, intent(out) ::  ierr

DM :: dm
Vec :: global_vector
Vec :: local_vector
PetscScalar, pointer, dimension(:, :, :) :: local_vector_3da, global_vector_3da
PetscInt :: i, j, k

call KSPGetDM(ksp, dm, ierr)
call KSPGetSolution(ksp, global_vector, ierr)
call DMCreateLocalVector(dm, local_vector, ierr)
call DMGlobalToLocalBegin(dm, global_vector, INSERT_VALUES, local_vector, ierr)
call DMGlobalToLocalEnd(dm, global_vector, INSERT_VALUES, local_vector, ierr)

call DMDAVecGetArrayF90(dm, local_vector, local_vector_3da, ierr)
call DMDAVecGetArrayF90(dm, global_vector, global_vector_3da, ierr)

do k = z_start_ghost, z_max_ghost
do j = y_start_ghost, y_max_ghost
do i = x_start_ghost, x_max_ghost
pres(j, i+1, k) = local_vector_3da(i, j, k)
end do
end do
end do

call DMDAVecRestoreArrayF90(dm, global_vector, global_vector_3da, ierr)
call DMDAVecRestoreArrayF90(dm, local_vector, local_vector_3da, ierr)

! The following call is required to prevent a memory leak.
call VecDestroy(local_vector, ierr)

end subroutine copy_pressure_out_of_petsc

end module pressure_solver
```