diff --git a/include/petsccusp.h b/include/petsccusp.h index 90cfff4..b541a95 100644 --- a/include/petsccusp.h +++ b/include/petsccusp.h @@ -1,8 +1,18 @@ #if !defined(__PETSCCUSP_H) #define __PETSCCUSP_H -/* - This should only be included in user code that uses CUSP directly and hence the file name ends with .cu -*/ -#include <../src/vec/vec/impls/dvecimpl.h> -#include <../src/vec/vec/impls/seq/seqcusp/cuspvecimpl.h> + +#include +#include +#include +#include + +PETSC_EXTERN PetscErrorCode VecCUSPGetArrayReadWrite(Vec v, cusp::array1d **a); +PETSC_EXTERN PetscErrorCode VecCUSPRestoreArrayReadWrite(Vec v, cusp::array1d **a); + +PETSC_EXTERN PetscErrorCode VecCUSPGetArrayRead(Vec v, cusp::array1d **a); +PETSC_EXTERN PetscErrorCode VecCUSPRestoreArrayRead(Vec v, cusp::array1d **a); + +PETSC_EXTERN PetscErrorCode VecCUSPGetArrayWrite(Vec v, cusp::array1d **a); +PETSC_EXTERN PetscErrorCode VecCUSPRestoreArrayWrite(Vec v, cusp::array1d **a); + #endif diff --git a/src/snes/examples/tutorials/ex47cu.cu b/src/snes/examples/tutorials/ex47cu.cu index 31e91bd..bc330d6 100644 --- a/src/snes/examples/tutorials/ex47cu.cu +++ b/src/snes/examples/tutorials/ex47cu.cu @@ -7,6 +7,12 @@ static char help[] = "Solves -Laplacian u - exp(u) = 0, 0 < x < 1 using GPU\n\n #include #include +#include +#include +#include +#include +#include + extern PetscErrorCode ComputeFunction(SNES,Vec,Vec,void*), ComputeJacobian(SNES,Vec,Mat*,Mat*,MatStructure*,void*); PetscBool useCUSP = PETSC_FALSE; @@ -67,14 +73,14 @@ struct ApplyStencil PetscErrorCode ComputeFunction(SNES snes,Vec x,Vec f,void *ctx) { - PetscInt i,Mx,xs,xm,xstartshift,xendshift,fstart; + PetscInt i,Mx,xs,xm,xstartshift,xendshift,fstart,lsize; PetscScalar *xx,*ff,hx; DM da = (DM) ctx; Vec xlocal; PetscErrorCode ierr; PetscMPIInt rank,size; MPI_Comm comm; - CUSPARRAY *xarray,*farray; + cusp::array1d *xarray,*farray; ierr = DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);CHKERRQ(ierr); hx = 1.0/(PetscReal)(Mx-1); @@ -93,6 +99,7 @@ PetscErrorCode ComputeFunction(SNES snes,Vec x,Vec f,void *ctx) if (rank != size-1) xendshift = 1; else xendshift = 0; ierr = VecGetOwnershipRange(f,&fstart,NULL);CHKERRQ(ierr); + ierr = VecGetLocalSize(x,&lsize);CHKERRQ(ierr); try { thrust::for_each( thrust::make_zip_iterator( @@ -110,7 +117,7 @@ PetscErrorCode ComputeFunction(SNES snes,Vec x,Vec f,void *ctx) xarray->end()-xendshift, xarray->end()-xendshift + 1, xarray->end()-xendshift - 1, - thrust::counting_iterator(fstart) + x->map->n, + thrust::counting_iterator(fstart) + lsize, thrust::constant_iterator(Mx), thrust::constant_iterator(hx))), ApplyStencil()); diff --git a/src/vec/vec/impls/seq/seqcusp/cuspvecimpl.h b/src/vec/vec/impls/seq/seqcusp/cuspvecimpl.h index fe28e55..37a6c7e 100644 --- a/src/vec/vec/impls/seq/seqcusp/cuspvecimpl.h +++ b/src/vec/vec/impls/seq/seqcusp/cuspvecimpl.h @@ -1,6 +1,7 @@ #if !defined(__CUSPVECIMPL) #define __CUSPVECIMPL +#include #include #include @@ -87,77 +88,4 @@ struct Vec_CUSP { #endif }; - -#undef __FUNCT__ -#define __FUNCT__ "VecCUSPGetArrayReadWrite" -PETSC_STATIC_INLINE PetscErrorCode VecCUSPGetArrayReadWrite(Vec v, CUSPARRAY **a) -{ - PetscErrorCode ierr; - - PetscFunctionBegin; - *a = 0; - ierr = VecCUSPCopyToGPU(v);CHKERRQ(ierr); - *a = ((Vec_CUSP*)v->spptr)->GPUarray; - PetscFunctionReturn(0); -} - -#undef __FUNCT__ -#define __FUNCT__ "VecCUSPRestoreArrayReadWrite" -PETSC_STATIC_INLINE PetscErrorCode VecCUSPRestoreArrayReadWrite(Vec v, CUSPARRAY **a) -{ - PetscErrorCode ierr; - - PetscFunctionBegin; - v->valid_GPU_array = PETSC_CUSP_GPU; - - ierr = PetscObjectStateIncrease((PetscObject)v);CHKERRQ(ierr); - PetscFunctionReturn(0); -} - -#undef __FUNCT__ -#define __FUNCT__ "VecCUSPGetArrayRead" -PETSC_STATIC_INLINE PetscErrorCode VecCUSPGetArrayRead(Vec v, CUSPARRAY **a) -{ - PetscErrorCode ierr; - - PetscFunctionBegin; - *a = 0; - ierr = VecCUSPCopyToGPU(v);CHKERRQ(ierr); - *a = ((Vec_CUSP*)v->spptr)->GPUarray; - PetscFunctionReturn(0); -} - -#undef __FUNCT__ -#define __FUNCT__ "VecCUSPRestoreArrayRead" -PETSC_STATIC_INLINE PetscErrorCode VecCUSPRestoreArrayRead(Vec v, CUSPARRAY **a) -{ - PetscFunctionBegin; - PetscFunctionReturn(0); -} - -#undef __FUNCT__ -#define __FUNCT__ "VecCUSPGetArrayWrite" -PETSC_STATIC_INLINE PetscErrorCode VecCUSPGetArrayWrite(Vec v, CUSPARRAY **a) -{ - PetscErrorCode ierr; - - PetscFunctionBegin; - *a = 0; - ierr = VecCUSPAllocateCheck(v);CHKERRQ(ierr); - *a = ((Vec_CUSP*)v->spptr)->GPUarray; - PetscFunctionReturn(0); -} - -#undef __FUNCT__ -#define __FUNCT__ "VecCUSPRestoreArrayWrite" -PETSC_STATIC_INLINE PetscErrorCode VecCUSPRestoreArrayWrite(Vec v, CUSPARRAY **a) -{ - PetscErrorCode ierr; - - PetscFunctionBegin; - v->valid_GPU_array = PETSC_CUSP_GPU; - - ierr = PetscObjectStateIncrease((PetscObject)v);CHKERRQ(ierr); - PetscFunctionReturn(0); -} #endif diff --git a/src/vec/vec/impls/seq/seqcusp/veccusp.cu b/src/vec/vec/impls/seq/seqcusp/veccusp.cu index ce4ccaa..940ff50 100644 --- a/src/vec/vec/impls/seq/seqcusp/veccusp.cu +++ b/src/vec/vec/impls/seq/seqcusp/veccusp.cu @@ -1983,3 +1983,76 @@ PETSC_EXTERN PetscErrorCode VecCreate_SeqCUSP(Vec V) ierr = VecSet(V,0.0);CHKERRQ(ierr); PetscFunctionReturn(0); } + +#undef __FUNCT__ +#define __FUNCT__ "VecCUSPGetArrayReadWrite" +PETSC_EXTERN PetscErrorCode VecCUSPGetArrayReadWrite(Vec v, CUSPARRAY **a) +{ + PetscErrorCode ierr; + + PetscFunctionBegin; + *a = 0; + ierr = VecCUSPCopyToGPU(v);CHKERRQ(ierr); + *a = ((Vec_CUSP*)v->spptr)->GPUarray; + PetscFunctionReturn(0); +} + +#undef __FUNCT__ +#define __FUNCT__ "VecCUSPRestoreArrayReadWrite" +PETSC_EXTERN PetscErrorCode VecCUSPRestoreArrayReadWrite(Vec v, CUSPARRAY **a) +{ + PetscErrorCode ierr; + + PetscFunctionBegin; + v->valid_GPU_array = PETSC_CUSP_GPU; + + ierr = PetscObjectStateIncrease((PetscObject)v);CHKERRQ(ierr); + PetscFunctionReturn(0); +} + +#undef __FUNCT__ +#define __FUNCT__ "VecCUSPGetArrayRead" +PETSC_EXTERN PetscErrorCode VecCUSPGetArrayRead(Vec v, CUSPARRAY **a) +{ + PetscErrorCode ierr; + + PetscFunctionBegin; + *a = 0; + ierr = VecCUSPCopyToGPU(v);CHKERRQ(ierr); + *a = ((Vec_CUSP*)v->spptr)->GPUarray; + PetscFunctionReturn(0); +} + +#undef __FUNCT__ +#define __FUNCT__ "VecCUSPRestoreArrayRead" +PETSC_EXTERN PetscErrorCode VecCUSPRestoreArrayRead(Vec v, CUSPARRAY **a) +{ + PetscFunctionBegin; + PetscFunctionReturn(0); +} + +#undef __FUNCT__ +#define __FUNCT__ "VecCUSPGetArrayWrite" +PETSC_EXTERN PetscErrorCode VecCUSPGetArrayWrite(Vec v, CUSPARRAY **a) +{ + PetscErrorCode ierr; + + PetscFunctionBegin; + *a = 0; + ierr = VecCUSPAllocateCheck(v);CHKERRQ(ierr); + *a = ((Vec_CUSP*)v->spptr)->GPUarray; + PetscFunctionReturn(0); +} + +#undef __FUNCT__ +#define __FUNCT__ "VecCUSPRestoreArrayWrite" +PETSC_EXTERN PetscErrorCode VecCUSPRestoreArrayWrite(Vec v, CUSPARRAY **a) +{ + PetscErrorCode ierr; + + PetscFunctionBegin; + v->valid_GPU_array = PETSC_CUSP_GPU; + + ierr = PetscObjectStateIncrease((PetscObject)v);CHKERRQ(ierr); + PetscFunctionReturn(0); +}