exporting patches: # HG changeset patch # User Paul Mullowney # Date 1359762702 25200 # Node ID 56912710a81a5dddbb66dfe3a21cb8ddd32d6555 # Parent 45ae9050b6b259aa3fd8fcbae9fd54fc60f62a74 Reworked patch for bicg on GPUs. Adding changes to enable double complex bicg to work on GPUs via cusparse class. This includes a VecConjugate implementation in veccusp.cu and implementations of TransposeSolve in aijcusparse.cu. This also includes style fixes and protections to prevent memory leaks. diff -r 45ae9050b6b2 -r 56912710a81a src/mat/impls/aij/seq/seqcusparse/aijcusparse.cu --- a/src/mat/impls/aij/seq/seqcusparse/aijcusparse.cu Fri Feb 01 10:59:57 2013 -0600 +++ b/src/mat/impls/aij/seq/seqcusparse/aijcusparse.cu Fri Feb 01 16:51:42 2013 -0700 @@ -26,6 +26,8 @@ PetscErrorCode MatLUFactorNumeric_SeqAIJCUSPARSE(Mat,Mat,const MatFactorInfo *); PetscErrorCode MatSolve_SeqAIJCUSPARSE(Mat,Vec,Vec); PetscErrorCode MatSolve_SeqAIJCUSPARSE_NaturalOrdering(Mat,Vec,Vec); +PetscErrorCode MatSolveTranspose_SeqAIJCUSPARSE(Mat,Vec,Vec); +PetscErrorCode MatSolveTranspose_SeqAIJCUSPARSE_NaturalOrdering(Mat,Vec,Vec); PetscErrorCode MatSetFromOptions_SeqAIJCUSPARSE(Mat); PetscErrorCode MatSeqAIJCUSPARSEAnalysisAndCopyToGPU(Mat); PetscErrorCode MatMult_SeqAIJCUSPARSE(Mat,Vec,Vec); @@ -148,8 +150,7 @@ if (flg) { ierr = MatCUSPARSESetFormat(A,MAT_CUSPARSE_MULT,format);CHKERRQ(ierr); } - } - else { + } else { ierr = PetscOptionsEnum("-mat_cusparse_solve_storage_format","sets storage format of (seq)aijcusparse gpu matrices for TriSolve", "MatCUSPARSESetFormat",MatCUSPARSEStorageFormats,(PetscEnum)MAT_CUSPARSE_CSR,(PetscEnum*)&format,&flg);CHKERRQ(ierr); if (flg) { @@ -244,7 +245,7 @@ vi += nz; } cusparseMat = GPU_Matrix_Factory::getNew(MatCUSPARSEStorageFormats[cusparseTriFactors->format]); - stat = cusparseMat->initializeCusparse(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_TRIANGULAR, CUSPARSE_FILL_MODE_LOWER);CHKERRCUSP(stat); + stat = cusparseMat->initializeCusparseMat(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_TRIANGULAR, CUSPARSE_FILL_MODE_LOWER);CHKERRCUSP(stat); ierr = cusparseMat->setMatrix(n, n, nzLower, AiLo, AjLo, AALo);CHKERRCUSP(ierr); stat = cusparseMat->solveAnalysis();CHKERRCUSP(stat); ((Mat_SeqAIJCUSPARSETriFactors*)A->spptr)->loTriFactorPtr = cusparseMat; @@ -308,7 +309,7 @@ ierr = PetscMemcpy(&(AAUp[offset+1]), v, nz*sizeof(PetscScalar));CHKERRQ(ierr); } cusparseMat = GPU_Matrix_Factory::getNew(MatCUSPARSEStorageFormats[cusparseTriFactors->format]); - stat = cusparseMat->initializeCusparse(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_TRIANGULAR, CUSPARSE_FILL_MODE_UPPER);CHKERRCUSP(stat); + stat = cusparseMat->initializeCusparseMat(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_TRIANGULAR, CUSPARSE_FILL_MODE_UPPER);CHKERRCUSP(stat); ierr = cusparseMat->setMatrix(n, n, nzUpper, AiUp, AjUp, AAUp);CHKERRCUSP(ierr); stat = cusparseMat->solveAnalysis();CHKERRCUSP(stat); ((Mat_SeqAIJCUSPARSETriFactors*)A->spptr)->upTriFactorPtr = cusparseMat; @@ -371,9 +372,13 @@ /* determine which version of MatSolve needs to be used. */ ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); - if (row_identity && col_identity) B->ops->solve = MatSolve_SeqAIJCUSPARSE_NaturalOrdering; - else B->ops->solve = MatSolve_SeqAIJCUSPARSE; - + if (row_identity && col_identity) { + B->ops->solve = MatSolve_SeqAIJCUSPARSE_NaturalOrdering; + B->ops->solvetranspose = MatSolveTranspose_SeqAIJCUSPARSE_NaturalOrdering; + } else { + B->ops->solve = MatSolve_SeqAIJCUSPARSE; + B->ops->solvetranspose = MatSolveTranspose_SeqAIJCUSPARSE; + } /* get the triangular factors */ ierr = MatSeqAIJCUSPARSEAnalysisAndCopyToGPU(B);CHKERRQ(ierr); PetscFunctionReturn(0); @@ -382,12 +387,128 @@ #undef __FUNCT__ +#define __FUNCT__ "MatSeqAIJCUSPARSEAnalyzeTransposeForSolve" +PetscErrorCode MatSeqAIJCUSPARSEAnalyzeTransposeForSolve(Mat A) +{ + Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; + GPU_Matrix_Ifc* cusparseMatLo = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr; + GPU_Matrix_Ifc* cusparseMatUp = (GPU_Matrix_Ifc*)cusparseTriFactors->upTriFactorPtr; + cusparseStatus_t stat; + PetscFunctionBegin; + stat = cusparseMatLo->initializeCusparseMat(MAT_cusparseHandle, + CUSPARSE_MATRIX_TYPE_TRIANGULAR, + CUSPARSE_FILL_MODE_UPPER, + TRANSPOSE);CHKERRCUSP(stat); + stat = cusparseMatLo->solveAnalysis(TRANSPOSE);CHKERRCUSP(stat); + + stat = cusparseMatUp->initializeCusparseMat(MAT_cusparseHandle, + CUSPARSE_MATRIX_TYPE_TRIANGULAR, + CUSPARSE_FILL_MODE_LOWER, + TRANSPOSE);CHKERRCUSP(stat); + stat = cusparseMatUp->solveAnalysis(TRANSPOSE);CHKERRCUSP(stat); + PetscFunctionReturn(0); +} + +#undef __FUNCT__ +#define __FUNCT__ "MatSeqAIJCUSPARSEGenerateTransposeForMult" +PetscErrorCode MatSeqAIJCUSPARSEGenerateTransposeForMult(Mat A) +{ + PetscErrorCode ierr; + Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE*)A->spptr; + Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; + cusparseStatus_t stat; + PetscFunctionBegin; + stat = cusparseMat->mat->initializeCusparseMat(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_GENERAL, CUSPARSE_FILL_MODE_UPPER, TRANSPOSE);CHKERRCUSP(stat); + ierr = cusparseMat->mat->setMatrix(A->rmap->n, A->cmap->n, a->nz, a->i, a->j, a->a, TRANSPOSE);CHKERRCUSP(ierr); + PetscFunctionReturn(0); + } + + + + + +#undef __FUNCT__ +#define __FUNCT__ "MatSolveTranspose_SeqAIJCUSPARSE" +PetscErrorCode MatSolveTranspose_SeqAIJCUSPARSE(Mat A,Vec bb,Vec xx) +{ + Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; + PetscErrorCode ierr; + CUSPARRAY *xGPU, *bGPU; + cusparseStatus_t stat; + Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; + GPU_Matrix_Ifc *cusparseMatLo = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr; + GPU_Matrix_Ifc *cusparseMatUp = (GPU_Matrix_Ifc*)cusparseTriFactors->upTriFactorPtr; + CUSPARRAY * tempGPU = (CUSPARRAY*) cusparseTriFactors->tempvec; + + PetscFunctionBegin; + /* Analyze the matrix ... on the fly */ + if (!cusparseTriFactors->hasTranspose) { + ierr = MatSeqAIJCUSPARSEAnalyzeTransposeForSolve(A);CHKERRQ(ierr); + cusparseTriFactors->hasTranspose=PETSC_TRUE; + } + + /* Get the GPU pointers */ + ierr = VecCUSPGetArrayWrite(xx,&xGPU);CHKERRQ(ierr); + ierr = VecCUSPGetArrayRead(bb,&bGPU);CHKERRQ(ierr); + + /* solve with reordering */ + ierr = cusparseMatUp->reorderIn(xGPU, bGPU);CHKERRCUSP(ierr); + stat = cusparseMatUp->solve(xGPU, tempGPU, TRANSPOSE);CHKERRCUSP(stat); + stat = cusparseMatLo->solve(tempGPU, xGPU, TRANSPOSE);CHKERRCUSP(stat); + ierr = cusparseMatLo->reorderOut(xGPU);CHKERRCUSP(ierr); + + /* restore */ + ierr = VecCUSPRestoreArrayRead(bb,&bGPU);CHKERRQ(ierr); + ierr = VecCUSPRestoreArrayWrite(xx,&xGPU);CHKERRQ(ierr); + ierr = WaitForGPU();CHKERRCUSP(ierr); + ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); + PetscFunctionReturn(0); +} + +#undef __FUNCT__ +#define __FUNCT__ "MatSolveTranspose_SeqAIJCUSPARSE_NaturalOrdering" +PetscErrorCode MatSolveTranspose_SeqAIJCUSPARSE_NaturalOrdering(Mat A,Vec bb,Vec xx) +{ + Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; + PetscErrorCode ierr; + CUSPARRAY *xGPU, *bGPU; + cusparseStatus_t stat; + Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; + GPU_Matrix_Ifc *cusparseMatLo = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr; + GPU_Matrix_Ifc *cusparseMatUp = (GPU_Matrix_Ifc*)cusparseTriFactors->upTriFactorPtr; + CUSPARRAY * tempGPU = (CUSPARRAY*) cusparseTriFactors->tempvec; + + PetscFunctionBegin; + /* Analyze the matrix ... on the fly */ + if (!cusparseTriFactors->hasTranspose) { + ierr = MatSeqAIJCUSPARSEAnalyzeTransposeForSolve(A);CHKERRQ(ierr); + cusparseTriFactors->hasTranspose=PETSC_TRUE; + } + + /* Get the GPU pointers */ + ierr = VecCUSPGetArrayWrite(xx,&xGPU);CHKERRQ(ierr); + ierr = VecCUSPGetArrayRead(bb,&bGPU);CHKERRQ(ierr); + + /* solve */ + stat = cusparseMatUp->solve(bGPU, tempGPU, TRANSPOSE);CHKERRCUSP(stat); + stat = cusparseMatLo->solve(tempGPU, xGPU, TRANSPOSE);CHKERRCUSP(stat); + + /* restore */ + ierr = VecCUSPRestoreArrayRead(bb,&bGPU);CHKERRQ(ierr); + ierr = VecCUSPRestoreArrayWrite(xx,&xGPU);CHKERRQ(ierr); + ierr = WaitForGPU();CHKERRCUSP(ierr); + ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); + PetscFunctionReturn(0); +} + + +#undef __FUNCT__ #define __FUNCT__ "MatSolve_SeqAIJCUSPARSE" PetscErrorCode MatSolve_SeqAIJCUSPARSE(Mat A,Vec bb,Vec xx) { Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; PetscErrorCode ierr; - CUSPARRAY *xGPU=PETSC_NULL, *bGPU=PETSC_NULL; + CUSPARRAY *xGPU, *bGPU; cusparseStatus_t stat; Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; GPU_Matrix_Ifc *cusparseMatLo = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr; @@ -421,7 +542,7 @@ { Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; PetscErrorCode ierr; - CUSPARRAY *xGPU=PETSC_NULL, *bGPU=PETSC_NULL; + CUSPARRAY *xGPU, *bGPU; cusparseStatus_t stat; Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; GPU_Matrix_Ifc *cusparseMatLo = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr; @@ -508,7 +629,7 @@ ierr = cusparseMat->mat->buildStreamsAndEvents(size, &theBodyStream);CHKERRCUSP(ierr); /* FILL MODE UPPER is irrelevant */ - cusparseStatus_t stat = cusparseMat->mat->initializeCusparse(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_GENERAL, CUSPARSE_FILL_MODE_UPPER);CHKERRCUSP(stat); + cusparseStatus_t stat = cusparseMat->mat->initializeCusparseMat(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_GENERAL, CUSPARSE_FILL_MODE_UPPER);CHKERRCUSP(stat); /* lastly, build the matrix */ ierr = cusparseMat->mat->setMatrix(m, A->cmap->n, a->nz, ii, a->j, a->a);CHKERRCUSP(ierr); @@ -560,7 +681,7 @@ Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; PetscErrorCode ierr; Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE *)A->spptr; - CUSPARRAY *xarray=PETSC_NULL,*yarray=PETSC_NULL; + CUSPARRAY *xarray,*yarray; PetscFunctionBegin; /* The line below should not be necessary as it has been moved to MatAssemblyEnd_SeqAIJCUSPARSE @@ -589,19 +710,19 @@ Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; PetscErrorCode ierr; Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE *)A->spptr; - CUSPARRAY *xarray=PETSC_NULL,*yarray=PETSC_NULL; + CUSPARRAY *xarray,*yarray; PetscFunctionBegin; /* The line below should not be necessary as it has been moved to MatAssemblyEnd_SeqAIJCUSPARSE ierr = MatSeqAIJCUSPARSECopyToGPU(A);CHKERRQ(ierr); */ + if (!cusparseMat->hasTranspose) { + ierr = MatSeqAIJCUSPARSEGenerateTransposeForMult(A);CHKERRQ(ierr); + cusparseMat->hasTranspose=PETSC_TRUE; + } ierr = VecCUSPGetArrayRead(xx,&xarray);CHKERRQ(ierr); ierr = VecCUSPGetArrayWrite(yy,&yarray);CHKERRQ(ierr); try { -#if !defined(PETSC_USE_COMPLEX) ierr = cusparseMat->mat->multiply(xarray, yarray, TRANSPOSE);CHKERRCUSP(ierr); -#else - ierr = cusparseMat->mat->multiply(xarray, yarray, HERMITIAN);CHKERRCUSP(ierr); -#endif } catch (char* ex) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); } @@ -621,7 +742,7 @@ Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; PetscErrorCode ierr; Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE *)A->spptr; - CUSPARRAY *xarray=PETSC_NULL,*yarray=PETSC_NULL,*zarray=PETSC_NULL; + CUSPARRAY *xarray,*yarray,*zarray; PetscFunctionBegin; /* The line below should not be necessary as it has been moved to MatAssemblyEnd_SeqAIJCUSPARSE @@ -659,6 +780,11 @@ PetscFunctionBegin; /* The line below should not be necessary as it has been moved to MatAssemblyEnd_SeqAIJCUSPARSE ierr = MatSeqAIJCUSPARSECopyToGPU(A);CHKERRQ(ierr); */ + if (!cusparseMat->hasTranspose) { + ierr = MatSeqAIJCUSPARSEGenerateTransposeForMult(A);CHKERRQ(ierr); + cusparseMat->hasTranspose=PETSC_TRUE; + } + try { ierr = VecCopy_SeqCUSP(yy,zz);CHKERRQ(ierr); ierr = VecCUSPGetArrayRead(xx,&xarray);CHKERRQ(ierr); @@ -666,11 +792,7 @@ ierr = VecCUSPGetArrayWrite(zz,&zarray);CHKERRQ(ierr); /* multiply add with matrix transpose */ -#if !defined(PETSC_USE_COMPLEX) ierr = cusparseMat->mat->multiplyAdd(xarray, yarray, TRANSPOSE);CHKERRCUSP(ierr); -#else - ierr = cusparseMat->mat->multiplyAdd(xarray, yarray, HERMITIAN);CHKERRCUSP(ierr); -#endif ierr = VecCUSPRestoreArrayRead(xx,&xarray);CHKERRQ(ierr); ierr = VecCUSPRestoreArrayRead(yy,&yarray);CHKERRQ(ierr); @@ -825,6 +947,7 @@ ((Mat_SeqAIJCUSPARSE *)B->spptr)->mat = 0; ((Mat_SeqAIJCUSPARSE *)B->spptr)->tempvec = 0; ((Mat_SeqAIJCUSPARSE *)B->spptr)->format = MAT_CUSPARSE_CSR; + ((Mat_SeqAIJCUSPARSE *)B->spptr)->hasTranspose = PETSC_FALSE; } else { /* NEXT, set the pointers to the triangular factors */ B->spptr = new Mat_SeqAIJCUSPARSETriFactors; @@ -832,12 +955,14 @@ ((Mat_SeqAIJCUSPARSETriFactors *)B->spptr)->upTriFactorPtr = 0; ((Mat_SeqAIJCUSPARSETriFactors *)B->spptr)->tempvec = 0; ((Mat_SeqAIJCUSPARSETriFactors *)B->spptr)->format = cusparseMatSolveStorageFormat; + ((Mat_SeqAIJCUSPARSETriFactors *)B->spptr)->hasTranspose = PETSC_FALSE; } /* Create a single instance of the MAT_cusparseHandle for any matrix (matMult, TriSolve, ...) */ if (!MAT_cusparseHandle) { cusparseStatus_t stat; stat = cusparseCreate(&MAT_cusparseHandle);CHKERRCUSP(stat); } + /* Here we overload MatGetFactor_petsc_C which enables -mat_type aijcusparse to use the default cusparse tri solve. Note the difference with the implementation in MatCreate_SeqAIJCUSP in ../seqcusp/aijcusp.cu */ diff -r 45ae9050b6b2 -r 56912710a81a src/mat/impls/aij/seq/seqcusparse/cusparsematimpl.h --- a/src/mat/impls/aij/seq/seqcusparse/cusparsematimpl.h Fri Feb 01 10:59:57 2013 -0600 +++ b/src/mat/impls/aij/seq/seqcusparse/cusparsematimpl.h Fri Feb 01 16:51:42 2013 -0700 @@ -24,6 +24,7 @@ GPU_Matrix_Ifc* upTriFactorPtr; /* pointer for upper triangular (factored matrix) on GPU */ CUSPARRAY* tempvec; MatCUSPARSEStorageFormat format; /* the storage format for the matrix on the device */ + PetscBool hasTranspose; /* boolean describing whether a transpose has been built or not */ }; struct Mat_SeqAIJCUSPARSE { @@ -31,6 +32,7 @@ CUSPARRAY* tempvec; /*pointer to a workvector to which we can copy the relevant indices of a vector we want to multiply */ PetscInt nonzerorow; /* number of nonzero rows ... used in the flop calculations */ MatCUSPARSEStorageFormat format; /* the storage format for the matrix on the device */ + PetscBool hasTranspose; /* boolean describing whether a transpose has been built or not */ }; extern PetscErrorCode MatCUSPARSECopyToGPU(Mat); diff -r 45ae9050b6b2 -r 56912710a81a src/vec/vec/impls/seq/seqcusp/veccusp.cu --- a/src/vec/vec/impls/seq/seqcusp/veccusp.cu Fri Feb 01 10:59:57 2013 -0600 +++ b/src/vec/vec/impls/seq/seqcusp/veccusp.cu Fri Feb 01 16:51:42 2013 -0700 @@ -1907,6 +1907,35 @@ PetscFunctionReturn(0); } + +#if defined(PETSC_USE_COMPLEX) +struct conjugate +{ + __host__ __device__ + PetscScalar operator()(PetscScalar x) + { + return cusp::conj(x); + } +}; +#endif + + +#undef __FUNCT__ +#define __FUNCT__ "VecConjugate_SeqCUSP" +PetscErrorCode VecConjugate_SeqCUSP(Vec xin) +{ + PetscErrorCode ierr; + CUSPARRAY *xarray; + + PetscFunctionBegin; + ierr = VecCUSPGetArrayReadWrite(xin,&xarray);CHKERRQ(ierr); +#if defined(PETSC_USE_COMPLEX) + thrust::transform(xarray->begin(), xarray->end(), xarray->begin(), conjugate()); +#endif + ierr = VecCUSPRestoreArrayReadWrite(xin,&xarray);CHKERRQ(ierr); + PetscFunctionReturn(0); +} + EXTERN_C_BEGIN #undef __FUNCT__ #define __FUNCT__ "VecCreate_SeqCUSP" @@ -1948,6 +1977,7 @@ V->ops->resetarray = VecResetArray_SeqCUSP; V->ops->destroy = VecDestroy_SeqCUSP; V->ops->duplicate = VecDuplicate_SeqCUSP; + V->ops->conjugate = VecConjugate_SeqCUSP; ierr = VecCUSPAllocateCheck(V);CHKERRQ(ierr); V->valid_GPU_array = PETSC_CUSP_GPU; # HG changeset patch # User Paul Mullowney # Date 1359764049 25200 # Node ID a83c33ec5ec96d6518f1fb9255a6e2ac980b11ed # Parent 56912710a81a5dddbb66dfe3a21cb8ddd32d6555 A fix for the unitialized variable compiler warnings when compiling cusp classes in double complex. diff -r 56912710a81a -r a83c33ec5ec9 src/vec/vec/impls/seq/seqcusp/cuspvecimpl.h --- a/src/vec/vec/impls/seq/seqcusp/cuspvecimpl.h Fri Feb 01 16:51:42 2013 -0700 +++ b/src/vec/vec/impls/seq/seqcusp/cuspvecimpl.h Fri Feb 01 17:14:09 2013 -0700 @@ -111,7 +111,7 @@ PetscFunctionBegin; v->valid_GPU_array = PETSC_CUSP_GPU; - + ierr = PetscObjectStateIncrease((PetscObject)v);CHKERRQ(ierr); PetscFunctionReturn(0); } @@ -144,6 +144,7 @@ PetscErrorCode ierr; PetscFunctionBegin; + *a = 0; ierr = VecCUSPAllocateCheck(v);CHKERRQ(ierr); *a = ((Vec_CUSP*)v->spptr)->GPUarray; PetscFunctionReturn(0);