exporting patch: # HG changeset patch # User Paul Mullowney # Date 1357069160 25200 # Node ID acedecab2d8bc327ace3b7012d29d1b81e92cbb5 # Parent 29860ecac7a53df5f615e4e5d2dad88d9afd9d72 Adding changes to enable double complex bicg to work on GPUs via cusparse class. This includes a VecConjugate implementation in veccusp.cu and implementations of TransposeSolve in aijcusparse.cu. Lastly, some cleanup of warnings when doing complex builds with GPUs. diff -r 29860ecac7a5 -r acedecab2d8b src/mat/impls/aij/seq/seqcusp/aijcusp.cu --- a/src/mat/impls/aij/seq/seqcusp/aijcusp.cu Tue Jan 01 10:16:09 2013 -0600 +++ b/src/mat/impls/aij/seq/seqcusp/aijcusp.cu Tue Jan 01 12:39:20 2013 -0700 @@ -354,7 +354,7 @@ #ifndef PETSC_HAVE_TXPETSCGPU PetscBool usecprow = a->compressedrow.use; #endif - CUSPARRAY *xarray,*yarray; + CUSPARRAY *xarray,*yarray=PETSC_NULL; PetscFunctionBegin; /* The line below should not be necessary as it has been moved to MatAssemblyEnd_SeqAIJCUSP @@ -407,7 +407,7 @@ Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; PetscErrorCode ierr; Mat_SeqAIJCUSP *cuspstruct = (Mat_SeqAIJCUSP *)A->spptr; - CUSPARRAY *xarray,*yarray,*zarray; + CUSPARRAY *xarray,*yarray,*zarray=PETSC_NULL; PetscFunctionBegin; /* The line below should not be necessary as it has been moved to MatAssemblyEnd_SeqAIJCUSP ierr = MatCUSPCopyToGPU(A);CHKERRQ(ierr); */ diff -r 29860ecac7a5 -r acedecab2d8b src/mat/impls/aij/seq/seqcusparse/aijcusparse.cu --- a/src/mat/impls/aij/seq/seqcusparse/aijcusparse.cu Tue Jan 01 10:16:09 2013 -0600 +++ b/src/mat/impls/aij/seq/seqcusparse/aijcusparse.cu Tue Jan 01 12:39:20 2013 -0700 @@ -26,6 +26,8 @@ PetscErrorCode MatLUFactorNumeric_SeqAIJCUSPARSE(Mat,Mat,const MatFactorInfo *); PetscErrorCode MatSolve_SeqAIJCUSPARSE(Mat,Vec,Vec); PetscErrorCode MatSolve_SeqAIJCUSPARSE_NaturalOrdering(Mat,Vec,Vec); +PetscErrorCode MatSolveTranspose_SeqAIJCUSPARSE(Mat,Vec,Vec); +PetscErrorCode MatSolveTranspose_SeqAIJCUSPARSE_NaturalOrdering(Mat,Vec,Vec); PetscErrorCode MatSetFromOptions_SeqAIJCUSPARSE(Mat); PetscErrorCode MatSeqAIJCUSPARSEAnalysisAndCopyToGPU(Mat); PetscErrorCode MatMult_SeqAIJCUSPARSE(Mat,Vec,Vec); @@ -204,12 +206,12 @@ PetscInt i,nz, nzLower, offset, rowOffset; PetscFunctionBegin; - if (A->valid_GPU_matrix == PETSC_CUSP_UNALLOCATED || A->valid_GPU_matrix == PETSC_CUSP_CPU){ - try { + if (A->valid_GPU_matrix == PETSC_CUSP_UNALLOCATED || A->valid_GPU_matrix == PETSC_CUSP_CPU){ + try { /* first figure out the number of nonzeros in the lower triangular matrix including 1's on the diagonal. */ nzLower=n+ai[n]-ai[1]; - /* Allocate Space for the lower triangular matrix */ + /* Allocate Space for the lower triangular matrix */ ierr = cudaMallocHost((void **) &AiLo, (n+1)*sizeof(PetscInt));CHKERRCUSP(ierr); ierr = cudaMallocHost((void **) &AjLo, nzLower*sizeof(PetscInt));CHKERRCUSP(ierr); ierr = cudaMallocHost((void **) &AALo, nzLower*sizeof(PetscScalar));CHKERRCUSP(ierr); @@ -231,17 +233,17 @@ ierr = PetscMemcpy(&(AjLo[offset]), vi, nz*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemcpy(&(AALo[offset]), v, nz*sizeof(PetscScalar));CHKERRQ(ierr); - + offset+=nz; AjLo[offset]=(PetscInt) i; AALo[offset]=(MatScalar) 1.0; offset+=1; - + v += nz; vi += nz; } cusparseMat = GPU_Matrix_Factory::getNew(MatCUSPARSEStorageFormats[cusparseTriFactors->format]); - stat = cusparseMat->initializeCusparse(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_TRIANGULAR, CUSPARSE_FILL_MODE_LOWER);CHKERRCUSP(stat); + stat = cusparseMat->initializeCusparseMat(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_TRIANGULAR, CUSPARSE_FILL_MODE_LOWER);CHKERRCUSP(stat); ierr = cusparseMat->setMatrix(n, n, nzLower, AiLo, AjLo, AALo);CHKERRCUSP(ierr); stat = cusparseMat->solveAnalysis();CHKERRCUSP(stat); ((Mat_SeqAIJCUSPARSETriFactors*)A->spptr)->loTriFactorPtr = cusparseMat; @@ -252,7 +254,7 @@ SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); } } - PetscFunctionReturn(0); + PetscFunctionReturn(0); } #undef __FUNCT__ @@ -273,8 +275,8 @@ PetscFunctionBegin; - if (A->valid_GPU_matrix == PETSC_CUSP_UNALLOCATED || A->valid_GPU_matrix == PETSC_CUSP_CPU){ - try { + if (A->valid_GPU_matrix == PETSC_CUSP_UNALLOCATED || A->valid_GPU_matrix == PETSC_CUSP_CPU){ + try { /* next, figure out the number of nonzeros in the upper triangular matrix. */ nzUpper = adiag[0]-adiag[n]; @@ -290,23 +292,23 @@ for (i=n-1; i>=0; i--){ v = aa + adiag[i+1] + 1; vi = aj + adiag[i+1] + 1; - + /* number of elements NOT on the diagonal */ nz = adiag[i] - adiag[i+1]-1; - + /* decrement the offset */ offset -= (nz+1); - + /* first, set the diagonal elements */ AjUp[offset] = (PetscInt) i; AAUp[offset] = 1./v[nz]; AiUp[i] = AiUp[i+1] - (nz+1); - + ierr = PetscMemcpy(&(AjUp[offset+1]), vi, nz*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemcpy(&(AAUp[offset+1]), v, nz*sizeof(PetscScalar));CHKERRQ(ierr); } cusparseMat = GPU_Matrix_Factory::getNew(MatCUSPARSEStorageFormats[cusparseTriFactors->format]); - stat = cusparseMat->initializeCusparse(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_TRIANGULAR, CUSPARSE_FILL_MODE_UPPER);CHKERRCUSP(stat); + stat = cusparseMat->initializeCusparseMat(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_TRIANGULAR, CUSPARSE_FILL_MODE_UPPER);CHKERRCUSP(stat); ierr = cusparseMat->setMatrix(n, n, nzUpper, AiUp, AjUp, AAUp);CHKERRCUSP(ierr); stat = cusparseMat->solveAnalysis();CHKERRCUSP(stat); ((Mat_SeqAIJCUSPARSETriFactors*)A->spptr)->upTriFactorPtr = cusparseMat; @@ -317,7 +319,45 @@ SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); } } - PetscFunctionReturn(0); + PetscFunctionReturn(0); +} + + +#undef __FUNCT__ +#define __FUNCT__ "MatSeqAIJCUSPARSEAnalyzeTransposeForSolve" +PetscErrorCode MatSeqAIJCUSPARSEAnalyzeTransposeForSolve(Mat A) +{ + Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; + GPU_Matrix_Ifc* cusparseMatLo = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr; + GPU_Matrix_Ifc* cusparseMatUp = (GPU_Matrix_Ifc*)cusparseTriFactors->upTriFactorPtr; + cusparseStatus_t stat; + PetscFunctionBegin; + stat = cusparseMatLo->initializeCusparseMat(MAT_cusparseHandle, + CUSPARSE_MATRIX_TYPE_TRIANGULAR, + CUSPARSE_FILL_MODE_UPPER, + TRANSPOSE);CHKERRCUSP(stat); + stat = cusparseMatLo->solveAnalysis(TRANSPOSE);CHKERRCUSP(stat); + + stat = cusparseMatUp->initializeCusparseMat(MAT_cusparseHandle, + CUSPARSE_MATRIX_TYPE_TRIANGULAR, + CUSPARSE_FILL_MODE_LOWER, + TRANSPOSE);CHKERRCUSP(stat); + stat = cusparseMatUp->solveAnalysis(TRANSPOSE);CHKERRCUSP(stat); + PetscFunctionReturn(0); +} + +#undef __FUNCT__ +#define __FUNCT__ "MatSeqAIJCUSPARSEGenerateTransposeForMult" +PetscErrorCode MatSeqAIJCUSPARSEGenerateTransposeForMult(Mat A) +{ + PetscErrorCode ierr; + Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE*)A->spptr; + Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; + cusparseStatus_t stat; + PetscFunctionBegin; + stat = cusparseMat->mat->initializeCusparseMat(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_GENERAL, CUSPARSE_FILL_MODE_UPPER, TRANSPOSE);CHKERRCUSP(stat); + ierr = cusparseMat->mat->setMatrix(A->rmap->n, A->cmap->n, a->nz, a->i, a->j, a->a, TRANSPOSE);CHKERRCUSP(ierr); + PetscFunctionReturn(0); } #undef __FUNCT__ @@ -352,7 +392,7 @@ if (!col_identity) ierr = cusparseTriFactors->upTriFactorPtr->setOrdIndices(c, n);CHKERRCUSP(ierr); ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); - PetscFunctionReturn(0); + PetscFunctionReturn(0); } #undef __FUNCT__ @@ -369,14 +409,87 @@ /* determine which version of MatSolve needs to be used. */ ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); - if (row_identity && col_identity) B->ops->solve = MatSolve_SeqAIJCUSPARSE_NaturalOrdering; - else B->ops->solve = MatSolve_SeqAIJCUSPARSE; - + if (row_identity && col_identity) { + B->ops->solve = MatSolve_SeqAIJCUSPARSE_NaturalOrdering; + B->ops->solvetranspose = MatSolveTranspose_SeqAIJCUSPARSE_NaturalOrdering; + } + else { + B->ops->solve = MatSolve_SeqAIJCUSPARSE; + B->ops->solvetranspose = MatSolveTranspose_SeqAIJCUSPARSE; + } /* get the triangular factors */ ierr = MatSeqAIJCUSPARSEAnalysisAndCopyToGPU(B);CHKERRQ(ierr); PetscFunctionReturn(0); } + +#undef __FUNCT__ +#define __FUNCT__ "MatSolveTranspose_SeqAIJCUSPARSE" +PetscErrorCode MatSolveTranspose_SeqAIJCUSPARSE(Mat A,Vec bb,Vec xx) +{ + Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; + PetscErrorCode ierr; + CUSPARRAY *xGPU=PETSC_NULL, *bGPU; + cusparseStatus_t stat; + Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; + GPU_Matrix_Ifc *cusparseMatLo = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr; + GPU_Matrix_Ifc *cusparseMatUp = (GPU_Matrix_Ifc*)cusparseTriFactors->upTriFactorPtr; + CUSPARRAY * tempGPU = (CUSPARRAY*) cusparseTriFactors->tempvec; + + PetscFunctionBegin; + /* Analyze the matrix ... on the fly */ + ierr = MatSeqAIJCUSPARSEAnalyzeTransposeForSolve(A);CHKERRQ(ierr); + + /* Get the GPU pointers */ + ierr = VecCUSPGetArrayWrite(xx,&xGPU);CHKERRQ(ierr); + ierr = VecCUSPGetArrayRead(bb,&bGPU);CHKERRQ(ierr); + + /* solve with reordering */ + ierr = cusparseMatUp->reorderIn(xGPU, bGPU);CHKERRCUSP(ierr); + stat = cusparseMatUp->solve(xGPU, tempGPU, TRANSPOSE);CHKERRCUSP(stat); + stat = cusparseMatLo->solve(tempGPU, xGPU, TRANSPOSE);CHKERRCUSP(stat); + ierr = cusparseMatLo->reorderOut(xGPU);CHKERRCUSP(ierr); + + /* restore */ + ierr = VecCUSPRestoreArrayRead(bb,&bGPU);CHKERRQ(ierr); + ierr = VecCUSPRestoreArrayWrite(xx,&xGPU);CHKERRQ(ierr); + ierr = WaitForGPU();CHKERRCUSP(ierr); + ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); + PetscFunctionReturn(0); +} + +#undef __FUNCT__ +#define __FUNCT__ "MatSolveTranspose_SeqAIJCUSPARSE_NaturalOrdering" +PetscErrorCode MatSolveTranspose_SeqAIJCUSPARSE_NaturalOrdering(Mat A,Vec bb,Vec xx) +{ + Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; + PetscErrorCode ierr; + CUSPARRAY *xGPU=PETSC_NULL, *bGPU; + cusparseStatus_t stat; + Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; + GPU_Matrix_Ifc *cusparseMatLo = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr; + GPU_Matrix_Ifc *cusparseMatUp = (GPU_Matrix_Ifc*)cusparseTriFactors->upTriFactorPtr; + CUSPARRAY * tempGPU = (CUSPARRAY*) cusparseTriFactors->tempvec; + + PetscFunctionBegin; + /* Analyze the matrix ... on the fly */ + ierr = MatSeqAIJCUSPARSEAnalyzeTransposeForSolve(A);CHKERRQ(ierr); + + /* Get the GPU pointers */ + ierr = VecCUSPGetArrayWrite(xx,&xGPU);CHKERRQ(ierr); + ierr = VecCUSPGetArrayRead(bb,&bGPU);CHKERRQ(ierr); + + /* solve */ + stat = cusparseMatUp->solve(bGPU, tempGPU, TRANSPOSE);CHKERRCUSP(stat); + stat = cusparseMatLo->solve(tempGPU, xGPU, TRANSPOSE);CHKERRCUSP(stat); + + /* restore */ + ierr = VecCUSPRestoreArrayRead(bb,&bGPU);CHKERRQ(ierr); + ierr = VecCUSPRestoreArrayWrite(xx,&xGPU);CHKERRQ(ierr); + ierr = WaitForGPU();CHKERRCUSP(ierr); + ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); + PetscFunctionReturn(0); +} #undef __FUNCT__ @@ -385,7 +498,7 @@ { Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; PetscErrorCode ierr; - CUSPARRAY *xGPU, *bGPU; + CUSPARRAY *xGPU=PETSC_NULL, *bGPU; cusparseStatus_t stat; Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; GPU_Matrix_Ifc *cusparseMatLo = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr; @@ -402,7 +515,7 @@ stat = cusparseMatLo->solve(xGPU, tempGPU);CHKERRCUSP(stat); stat = cusparseMatUp->solve(tempGPU, xGPU);CHKERRCUSP(stat); ierr = cusparseMatUp->reorderOut(xGPU);CHKERRCUSP(ierr); - + ierr = VecCUSPRestoreArrayRead(bb,&bGPU);CHKERRQ(ierr); ierr = VecCUSPRestoreArrayWrite(xx,&xGPU);CHKERRQ(ierr); ierr = WaitForGPU();CHKERRCUSP(ierr); @@ -419,7 +532,7 @@ { Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; PetscErrorCode ierr; - CUSPARRAY *xGPU, *bGPU; + CUSPARRAY *xGPU=PETSC_NULL, *bGPU; cusparseStatus_t stat; Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr; GPU_Matrix_Ifc *cusparseMatLo = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr; @@ -466,7 +579,6 @@ delete cusparseMat->mat; if (cusparseMat->tempvec) delete cusparseMat->tempvec; - } catch(char* ex) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); } @@ -476,7 +588,7 @@ for (int j = 0; jnonzerorow += ((a->i[j+1]-a->i[j])>0); - if (a->compressedrow.use) { + if (a->compressedrow.use) { m = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; @@ -499,19 +611,18 @@ /* Build our matrix ... first determine the GPU storage type */ cusparseMat->mat = GPU_Matrix_Factory::getNew(MatCUSPARSEStorageFormats[cusparseMat->format]); - /* Create the streams and events (if desired). */ PetscMPIInt size; ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); - ierr = cusparseMat->mat->buildStreamsAndEvents(size, &theBodyStream);CHKERRCUSP(ierr); + ierr = cusparseMat->mat->buildStreamsAndEvents(size, &theBodyStream);CHKERRCUSP(ierr); /* FILL MODE UPPER is irrelevant */ - cusparseStatus_t stat = cusparseMat->mat->initializeCusparse(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_GENERAL, CUSPARSE_FILL_MODE_UPPER);CHKERRCUSP(stat); + cusparseStatus_t stat = cusparseMat->mat->initializeCusparseMat(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_GENERAL, CUSPARSE_FILL_MODE_UPPER);CHKERRCUSP(stat); /* lastly, build the matrix */ ierr = cusparseMat->mat->setMatrix(m, A->cmap->n, a->nz, ii, a->j, a->a);CHKERRCUSP(ierr); cusparseMat->mat->setCPRowIndices(ridx, m); - if (!a->compressedrow.use) { + if (!a->compressedrow.use) { ierr = PetscFree(ii);CHKERRQ(ierr); ierr = PetscFree(ridx);CHKERRQ(ierr); } @@ -559,7 +670,7 @@ Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; PetscErrorCode ierr; Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE *)A->spptr; - CUSPARRAY *xarray,*yarray; + CUSPARRAY *xarray,*yarray=PETSC_NULL; PetscFunctionBegin; /* The line below should not be necessary as it has been moved to MatAssemblyEnd_SeqAIJCUSPARSE @@ -588,19 +699,14 @@ Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; PetscErrorCode ierr; Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE *)A->spptr; - CUSPARRAY *xarray,*yarray; + CUSPARRAY *xarray,*yarray=PETSC_NULL; PetscFunctionBegin; - /* The line below should not be necessary as it has been moved to MatAssemblyEnd_SeqAIJCUSPARSE - ierr = MatSeqAIJCUSPARSECopyToGPU(A);CHKERRQ(ierr); */ + ierr = MatSeqAIJCUSPARSEGenerateTransposeForMult(A);CHKERRQ(ierr); ierr = VecCUSPGetArrayRead(xx,&xarray);CHKERRQ(ierr); ierr = VecCUSPGetArrayWrite(yy,&yarray);CHKERRQ(ierr); try { -#if !defined(PETSC_USE_COMPLEX) ierr = cusparseMat->mat->multiply(xarray, yarray, TRANSPOSE);CHKERRCUSP(ierr); -#else - ierr = cusparseMat->mat->multiply(xarray, yarray, HERMITIAN);CHKERRCUSP(ierr); -#endif } catch (char* ex) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex); } @@ -620,10 +726,8 @@ Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; PetscErrorCode ierr; Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE *)A->spptr; - CUSPARRAY *xarray,*yarray,*zarray; + CUSPARRAY *xarray,*yarray,*zarray=PETSC_NULL; PetscFunctionBegin; - /* The line below should not be necessary as it has been moved to MatAssemblyEnd_SeqAIJCUSPARSE - ierr = MatSeqAIJCUSPARSECopyToGPU(A);CHKERRQ(ierr); */ try { ierr = VecCopy_SeqCUSP(yy,zz);CHKERRQ(ierr); ierr = VecCUSPGetArrayRead(xx,&xarray);CHKERRQ(ierr); @@ -652,10 +756,9 @@ Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; PetscErrorCode ierr; Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE *)A->spptr; - CUSPARRAY *xarray,*yarray,*zarray; + CUSPARRAY *xarray,*yarray,*zarray=PETSC_NULL; PetscFunctionBegin; - /* The line below should not be necessary as it has been moved to MatAssemblyEnd_SeqAIJCUSPARSE - ierr = MatSeqAIJCUSPARSECopyToGPU(A);CHKERRQ(ierr); */ + ierr = MatSeqAIJCUSPARSEGenerateTransposeForMult(A);CHKERRQ(ierr); try { ierr = VecCopy_SeqCUSP(yy,zz);CHKERRQ(ierr); ierr = VecCUSPGetArrayRead(xx,&xarray);CHKERRQ(ierr); @@ -663,11 +766,7 @@ ierr = VecCUSPGetArrayWrite(zz,&zarray);CHKERRQ(ierr); /* multiply add with matrix transpose */ -#if !defined(PETSC_USE_COMPLEX) ierr = cusparseMat->mat->multiplyAdd(xarray, yarray, TRANSPOSE);CHKERRCUSP(ierr); -#else - ierr = cusparseMat->mat->multiplyAdd(xarray, yarray, HERMITIAN);CHKERRCUSP(ierr); -#endif ierr = VecCUSPRestoreArrayRead(xx,&xarray);CHKERRQ(ierr); ierr = VecCUSPRestoreArrayRead(yy,&yarray);CHKERRQ(ierr); @@ -786,7 +885,7 @@ GPU_Matrix_Ifc *cusparseMatLo = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr; GPU_Matrix_Ifc *cusparseMatUp = (GPU_Matrix_Ifc*)cusparseTriFactors->upTriFactorPtr; delete (GPU_Matrix_Ifc *)cusparseMatLo; - delete (GPU_Matrix_Ifc *)cusparseMatUp; + delete (GPU_Matrix_Ifc *)cusparseMatUp; delete (CUSPARRAY*) cusparseTriFactors->tempvec; delete cusparseTriFactors; } catch(char* ex) { diff -r 29860ecac7a5 -r acedecab2d8b src/vec/vec/impls/seq/seqcusp/veccusp.cu --- a/src/vec/vec/impls/seq/seqcusp/veccusp.cu Tue Jan 01 10:16:09 2013 -0600 +++ b/src/vec/vec/impls/seq/seqcusp/veccusp.cu Tue Jan 01 12:39:20 2013 -0700 @@ -634,7 +634,7 @@ #define __FUNCT__ "VecPointwiseDivide_SeqCUSP" PetscErrorCode VecPointwiseDivide_SeqCUSP(Vec win, Vec xin, Vec yin) { - CUSPARRAY *warray,*xarray,*yarray; + CUSPARRAY *warray=PETSC_NULL,*xarray,*yarray; PetscErrorCode ierr; PetscFunctionBegin; @@ -700,7 +700,7 @@ #define __FUNCT__ "VecWAXPY_SeqCUSP" PetscErrorCode VecWAXPY_SeqCUSP(Vec win,PetscScalar alpha,Vec xin, Vec yin) { - CUSPARRAY *xarray,*yarray,*warray; + CUSPARRAY *xarray,*yarray,*warray=PETSC_NULL; PetscErrorCode ierr; PetscFunctionBegin; @@ -1288,7 +1288,7 @@ #define __FUNCT__ "VecSet_SeqCUSP" PetscErrorCode VecSet_SeqCUSP(Vec xin,PetscScalar alpha) { - CUSPARRAY *xarray; + CUSPARRAY *xarray=PETSC_NULL; PetscErrorCode ierr; PetscFunctionBegin; @@ -1899,6 +1899,35 @@ PetscFunctionReturn(0); } +#if defined(PETSC_USE_COMPLEX) +struct conjugate +{ + __host__ __device__ + PetscScalar operator()(PetscScalar x) + { + return cusp::conj(x); + } +}; +#endif + + +#undef __FUNCT__ +#define __FUNCT__ "VecConjugate_SeqCUSP" +PetscErrorCode VecConjugate_SeqCUSP(Vec xin) +{ + PetscErrorCode ierr; + CUSPARRAY *xarray; + + PetscFunctionBegin; + ierr = VecCUSPGetArrayRead(xin,&xarray);CHKERRQ(ierr); +#if defined(PETSC_USE_COMPLEX) + thrust::transform(xarray->begin(), xarray->end(), xarray->begin(), conjugate()); +#endif + ierr = VecCUSPRestoreArrayRead(xin,&xarray);CHKERRQ(ierr); + PetscFunctionReturn(0); +} + + EXTERN_C_BEGIN #undef __FUNCT__ #define __FUNCT__ "VecCreate_SeqCUSP" @@ -1939,7 +1968,7 @@ V->ops->resetarray = VecResetArray_SeqCUSP; V->ops->destroy = VecDestroy_SeqCUSP; V->ops->duplicate = VecDuplicate_SeqCUSP; - + V->ops->conjugate = VecConjugate_SeqCUSP; ierr = VecCUSPAllocateCheck(V);CHKERRQ(ierr); V->valid_GPU_array = PETSC_CUSP_GPU; ierr = VecSet(V,0.0);CHKERRQ(ierr);