diff --git a/include/finclude/petscmatdef.h b/include/finclude/petscmatdef.h index 6b5394e..fa8503a 100644 --- a/include/finclude/petscmatdef.h +++ b/include/finclude/petscmatdef.h @@ -73,6 +73,9 @@ #define MATSEQAIJ 'seqaij' #define MATMPIAIJ 'mpiaij' #define MATAIJ 'aij' +#define MATSEQFAIJ 'seqfaij' +#define MATMPIFAIJ 'mpifaij' +#define MATFAIJ 'faij' #define MATSHELL 'shell' #define MATSEQDENSE 'seqdense' #define MATMPIDENSE 'mpidense' diff --git a/include/petscmat.h b/include/petscmat.h index eff535a..676a47a 100644 --- a/include/petscmat.h +++ b/include/petscmat.h @@ -35,6 +35,9 @@ E*/ #define MATSEQAIJ "seqaij" #define MATMPIAIJ "mpiaij" #define MATAIJ "aij" +#define MATSEQFAIJ "seqfaij" +#define MATMPIFAIJ "mpifaij" +#define MATFAIJ "faij" #define MATSHELL "shell" #define MATSEQDENSE "seqdense" #define MATMPIDENSE "mpidense" diff --git a/src/mat/impls/aij/mpi/faij/makefile b/src/mat/impls/aij/mpi/faij/makefile new file mode 100644 index 0000000..43c79ec --- /dev/null +++ b/src/mat/impls/aij/mpi/faij/makefile @@ -0,0 +1,17 @@ +ALL: lib + +CFLAGS = +FFLAGS = +SOURCEC = mpifaij.c +SOURCEF = +SOURCEH = +OBJSC = mpifaij.o +OBJSF = +LIBBASE = libpetscmat +DIRS = +MANSEC = Mat +LOCDIR = src/mat/impls/aij/mpi/faij/ + +include ${PETSC_DIR}/conf/variables +include ${PETSC_DIR}/conf/rules +include ${PETSC_DIR}/conf/test diff --git a/src/mat/impls/aij/mpi/faij/mpifaij.c b/src/mat/impls/aij/mpi/faij/mpifaij.c new file mode 100644 index 0000000..0fe5dea --- /dev/null +++ b/src/mat/impls/aij/mpi/faij/mpifaij.c @@ -0,0 +1,577 @@ +#define PETSCMAT_DLL + +#include "../src/mat/impls/aij/mpi/mpiaij.h" +#undef __FUNCT__ +#define __FUNCT__ "MatCreateMPIFAIJ" +/*@C + MatCreateMPIFAIJ - Creates a sparse parallel matrix whose local + portions are stored as SEQFAIJ matrices (a matrix class that inherits + from SEQAIJ but with dynamic array for flexible value insert). + + Collective on MPI_Comm + + Input Parameters: ++ comm - MPI communicator +. m - number of local rows (or PETSC_DECIDE to have calculated if M is given) + This value should be the same as the local size used in creating the + y vector for the matrix-vector product y = Ax. +. n - This value should be the same as the local size used in creating the + x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have + calculated if N is given) For square matrices n is almost always m. +. M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) +. N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) +. d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix + (same value is used for all local rows) +. d_nnz - array containing the number of nonzeros in the various rows of the + DIAGONAL portion of the local submatrix (possibly different for each row) + or PETSC_NULL, if d_nz is used to specify the nonzero structure. + The size of this array is equal to the number of local rows, i.e 'm'. + You must leave room for the diagonal entry even if it is zero. +. o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local + submatrix (same value is used for all local rows). +- o_nnz - array containing the number of nonzeros in the various rows of the + OFF-DIAGONAL portion of the local submatrix (possibly different for + each row) or PETSC_NULL, if o_nz is used to specify the nonzero + structure. The size of this array is equal to the number + of local rows, i.e 'm'. + + Output Parameter: +. A - the matrix + + Notes: + If the *_nnz parameter is given then the *_nz parameter is ignored + + m,n,M,N parameters specify the size of the matrix, and its partitioning across + processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate + storage requirements for this matrix. + + If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one + processor than it must be used on all processors that share the object for + that argument. + + The user MUST specify either the local or global matrix dimensions + (possibly both). + + The parallel matrix is partitioned such that the first m0 rows belong to + process 0, the next m1 rows belong to process 1, the next m2 rows belong + to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. + + The DIAGONAL portion of the local submatrix of a processor can be defined + as the submatrix which is obtained by extraction the part corresponding + to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the + first row that belongs to the processor, and r2 is the last row belonging + to the this processor. This is a square mxm matrix. The remaining portion + of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. + + If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. + + When calling this routine with a single process communicator, a matrix of + type SEQFAIJ is returned. If a matrix of type MPIFAIJ is desired + for this type of communicator, use the construction mechanism: + MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); + + By default, this format uses inodes (identical nodes) when possible. + We search for consecutive rows with the same nonzero structure, thereby + reusing matrix information to achieve increased efficiency. + + Options Database Keys: ++ -mat_no_inode - Do not use inodes +. -mat_inode_limit - Sets inode limit (max limit=5) +- -mat_aij_oneindex - Internally use indexing starting at 1 + rather than 0. Note that when calling MatSetValues(), + the user still MUST index entries starting at 0! + + Level: intermediate + +.keywords: matrix, cray, sparse, parallel + +.seealso: MatCreate(), MatCreateSeqFAIJ(), MatSetValues() +@*/ +PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIFAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) { + PetscErrorCode ierr; + PetscMPIInt size; + + PetscFunctionBegin; + ierr = MatCreate(comm,A); + CHKERRQ(ierr); + ierr = MatSetSizes(*A,m,n,M,N); + CHKERRQ(ierr); + ierr = MPI_Comm_size(comm,&size); + CHKERRQ(ierr); + if (size > 1) { + ierr = MatSetType(*A,MATMPIFAIJ); + CHKERRQ(ierr); + ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz); + CHKERRQ(ierr); + } else { + ierr = MatSetType(*A,MATSEQFAIJ); + CHKERRQ(ierr); + ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz); + CHKERRQ(ierr); + } + PetscFunctionReturn(0); +} + +EXTERN_C_BEGIN +extern PetscErrorCode MatConvert_SeqAIJ_SeqFAIJ(Mat,const MatType,MatReuse,Mat*); +extern PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); +EXTERN_C_END + +EXTERN_C_BEGIN +#undef __FUNCT__ +#define __FUNCT__ "MatMPIAIJSetPreallocation_MPIFAIJ" +PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIFAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) { + Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; + PetscErrorCode ierr; + + PetscFunctionBegin; + ierr = MatMPIAIJSetPreallocation_MPIAIJ(B,d_nz,d_nnz,o_nz,o_nnz); + CHKERRQ(ierr); + ierr = MatConvert_SeqAIJ_SeqFAIJ(b->A, MATSEQFAIJ, MAT_REUSE_MATRIX, &b->A); + CHKERRQ(ierr); + ierr = MatConvert_SeqAIJ_SeqFAIJ(b->B, MATSEQFAIJ, MAT_REUSE_MATRIX, &b->B); + CHKERRQ(ierr); + PetscFunctionReturn(0); +} +EXTERN_C_END + + +extern PetscErrorCode MatSetValues_SeqFAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is); +extern PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat); +extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx); + +#undef __FUNCT__ +#define __FUNCT__ "MatSetValues_MPIFAIJ" +PetscErrorCode MatSetValues_MPIFAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) { + Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; + PetscScalar value; + PetscErrorCode ierr; + PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; + PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; + PetscTruth roworiented = aij->roworiented; + + /* Some Variables required in the macro */ + Mat A = aij->A; + Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; + PetscTruth ignorezeroentries = a->ignorezeroentries; + Mat B = aij->B; + + PetscFunctionBegin; + if (v) + PetscValidScalarPointer(v,6); + for (i=0; i= mat->rmap->N) + SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); +#endif + + if (im[i] >= rstart && im[i] < rend) { + row = im[i] - rstart; + + for (j=0; j= cstart && in[j] < cend) { + col = in[j] - cstart; + MatSetValues_SeqFAIJ(A,1,&row,1,&col,&value,addv); + } else if (in[j] < 0) + continue; +#if defined(PETSC_USE_DEBUG) + + else if (in[j] >= mat->cmap->N) { + SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); + } +#endif + else { + if (mat->was_assembled) { + if (!aij->colmap) { + ierr = CreateColmap_MPIAIJ_Private(mat); + CHKERRQ(ierr); + } +#if defined (PETSC_USE_CTABLE) + ierr = PetscTableFind(aij->colmap,in[j]+1,&col); + CHKERRQ(ierr); + col--; +#else + + col = aij->colmap[in[j]] - 1; +#endif + + if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { + ierr = DisAssemble_MPIAIJ(mat); + CHKERRQ(ierr); + col = in[j]; + /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ + B = aij->B; + } + } else + col = in[j]; + MatSetValues_SeqFAIJ(B,1,&row,1,&col,&value,addv); + } + } + } else { + if (!aij->donotstash) { + if (roworiented) { + ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES))); + CHKERRQ(ierr); + } else { + ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES))); + CHKERRQ(ierr); + } + } + } + } + PetscFunctionReturn(0); +} + + + +#undef __FUNCT__ +#define __FUNCT__ "MatAssemblyBegin_MPIFAIJ" +PetscErrorCode MatAssemblyBegin_MPIFAIJ(Mat mat,MatAssemblyType mode) { + Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; + PetscErrorCode ierr; + PetscInt nstash,reallocs; + InsertMode addv; + + PetscFunctionBegin; + if (aij->donotstash) { + PetscFunctionReturn(0); + } + + /* make sure all processors are either in INSERTMODE or ADDMODE */ + ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm); + CHKERRQ(ierr); + if (addv == (ADD_VALUES|INSERT_VALUES)) { + SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); + } + mat->insertmode = addv; /* in case this processor had no cache */ + + ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range); + CHKERRQ(ierr); + ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs); + CHKERRQ(ierr); + ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); + CHKERRQ(ierr); + PetscFunctionReturn(0); +} + + +extern PetscErrorCode MatFlushBuffer_SeqFAIJ(Mat mat); + + +#undef __FUNCT__ +#define __FUNCT__ "MatAssemblyEnd_MPIFAIJ" +PetscErrorCode MatAssemblyEnd_MPIFAIJ(Mat mat,MatAssemblyType mode) { + Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; + Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; + PetscErrorCode ierr; + PetscMPIInt n; + PetscInt i,j,rstart,ncols,flg; + PetscInt *row,*col; + PetscTruth other_disassembled; + PetscScalar *val; + InsertMode addv = mat->insertmode; + + /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ + PetscFunctionBegin; + if (!aij->donotstash) { + while (1) { + ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg); + CHKERRQ(ierr); + if (!flg) + break; + + for (i=0; istash); + CHKERRQ(ierr); + } + + a->compressedrow.use = PETSC_FALSE; + ierr = MatAssemblyBegin(aij->A,mode); + CHKERRQ(ierr); + ierr = MatAssemblyEnd(aij->A,mode); + CHKERRQ(ierr); + + /* determine if any processor has disassembled, if so we must + also disassemble ourselfs, in order that we may reassemble. */ + /* + if nonzero structure of submatrix B cannot change then we know that + no processor disassembled thus we can skip this stuff + */ + if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { + ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm); + CHKERRQ(ierr); + if (mat->was_assembled && !other_disassembled) { + ierr = DisAssemble_MPIAIJ(mat); + CHKERRQ(ierr); + } + } + + if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { + /* must flush matrix entries in the hash table to aij array */ + MatFlushBuffer_SeqFAIJ(aij->B); + ierr = MatSetUpMultiply_MPIAIJ(mat); + CHKERRQ(ierr); + } + + ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE); + CHKERRQ(ierr); + ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_FALSE; /* b->compressedrow.use */ + ierr = MatAssemblyBegin(aij->B,mode); + CHKERRQ(ierr); + ierr = MatAssemblyEnd(aij->B,mode); + CHKERRQ(ierr); + + ierr = PetscFree2(aij->rowvalues,aij->rowindices); + CHKERRQ(ierr); + aij->rowvalues = 0; + + /* used by MatAXPY() */ + a->xtoy = 0; + ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ + a->XtoY = 0; + ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ + + if (aij->diag) { + ierr = VecDestroy(aij->diag); + CHKERRQ(ierr); + aij->diag = 0; + } + if (a->inode.size) + mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; + + PetscFunctionReturn(0); +} + + +EXTERN_C_BEGIN +#undef __FUNCT__ +#define __FUNCT__ "MatConvert_MPIAIJ_MPIFAIJ" +PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIFAIJ(Mat A,const MatType type,MatReuse reuse,Mat *newmat) { + PetscErrorCode ierr; + Mat B = *newmat; + + PetscFunctionBegin; + if (reuse == MAT_INITIAL_MATRIX) { + ierr = MatDuplicate(A,MAT_COPY_VALUES,&B); + CHKERRQ(ierr); + } + + ierr = PetscObjectChangeTypeName( (PetscObject) B, MATMPIFAIJ); + CHKERRQ(ierr); + ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", + "MatMPIAIJSetPreallocation_MPIFAIJ", + MatMPIAIJSetPreallocation_MPIFAIJ); + CHKERRQ(ierr); + B->ops->setvalues = MatSetValues_MPIFAIJ; + B->ops->assemblybegin = MatAssemblyBegin_MPIFAIJ; + B->ops->assemblyend = MatAssemblyEnd_MPIFAIJ; + *newmat = B; + PetscFunctionReturn(0); +} +EXTERN_C_END + + + +EXTERN_C_BEGIN +#undef __FUNCT__ +#define __FUNCT__ "MatCreate_MPIFAIJ" +PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIFAIJ(Mat A) { + PetscErrorCode ierr; + + PetscFunctionBegin; + ierr = MatSetType(A,MATMPIAIJ); + CHKERRQ(ierr); + ierr = MatConvert_MPIAIJ_MPIFAIJ(A,MATMPIFAIJ,MAT_REUSE_MATRIX,&A); + CHKERRQ(ierr); + PetscFunctionReturn(0); +} +EXTERN_C_END + +/*MC + MATFAIJ - MATFAIJ = "FAIJ" - A matrix type to be used for sparse matrices. + + This matrix type is identical to MATSEQFAIJ when constructed with a single process communicator, + and MATMPIFAIJ otherwise. As a result, for single process communicators, + MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported + for communicators controlling multiple processes. It is recommended that you call both of + the above preallocation routines for simplicity. + + Options Database Keys: +. -mat_type faij - sets the matrix type to "FAIJ" during a call to MatSetFromOptions() + + Level: beginner + +.seealso: MatCreateMPIFAIJ(), MATSEQFAIJ, MATMPIFAIJ +M*/ + +EXTERN_C_BEGIN +#undef __FUNCT__ +#define __FUNCT__ "MatCreate_FAIJ" +PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_FAIJ(Mat A) { + PetscErrorCode ierr; + PetscMPIInt size; + + PetscFunctionBegin; + ierr = MPI_Comm_size(((PetscObject)A)->comm,&size); + CHKERRQ(ierr); + if (size == 1) { + ierr = MatSetType(A,MATSEQFAIJ); + CHKERRQ(ierr); + } else { + ierr = MatSetType(A,MATMPIFAIJ); + CHKERRQ(ierr); + } + PetscFunctionReturn(0); +} +EXTERN_C_END + + + +/* + Special version for direct calls from Fortran +*/ +#if defined(PETSC_HAVE_FORTRAN_CAPS) +#define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ +#elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) +#define matsetvaluesmpiaij_ matsetvaluesmpiaij +#endif + +/* Change these macros so can be used in void function */ +#undef CHKERRQ +#define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) +#undef SETERRQ2 +#define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) +#undef SETERRQ +#define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) + +EXTERN_C_BEGIN +#undef __FUNCT__ +#define __FUNCT__ "matsetvaluesmpifaij_" +void PETSC_STDCALL matsetvaluesmpifaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) { + Mat mat = *mmat; + PetscInt m = *mm, n = *mn; + InsertMode addv = *maddv; + Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; + PetscScalar value; + PetscErrorCode ierr; + + ierr = MatPreallocated(mat); + CHKERRQ(ierr); + if (mat->insertmode == NOT_SET_VALUES) { + mat->insertmode = addv; + } +#if defined(PETSC_USE_DEBUG) + else if (mat->insertmode != addv) { + SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); + } +#endif + { + PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; + PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; + PetscTruth roworiented = aij->roworiented; + + /* Some Variables required in the macro */ + Mat A = aij->A; + Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; + PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); + Mat B = aij->B; + + PetscFunctionBegin; + for (i=0; i= mat->rmap->N) + SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); +#endif + + if (im[i] >= rstart && im[i] < rend) { + row = im[i] - rstart; + for (j=0; j= cstart && in[j] < cend) { + col = in[j] - cstart; + MatSetValues_SeqFAIJ(A,1,&row,1,&col,&value,addv); + } else if (in[j] < 0) + continue; +#if defined(PETSC_USE_DEBUG) + + else if (in[j] >= mat->cmap->N) { + SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); + } +#endif + else { + if (mat->was_assembled) { + if (!aij->colmap) { + ierr = CreateColmap_MPIAIJ_Private(mat); + CHKERRQ(ierr); + } +#if defined (PETSC_USE_CTABLE) + ierr = PetscTableFind(aij->colmap,in[j]+1,&col); + CHKERRQ(ierr); + col--; +#else + + col = aij->colmap[in[j]] - 1; +#endif + + if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { + ierr = DisAssemble_MPIAIJ(mat); + CHKERRQ(ierr); + col = in[j]; + /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ + B = aij->B; + } + } else + col = in[j]; + MatSetValues_SeqFAIJ(B,1,&row,1,&col,&value,addv); + } + } + } else { + if (!aij->donotstash) { + if (roworiented) { + ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES))); + CHKERRQ(ierr); + } else { + ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES))); + CHKERRQ(ierr); + } + } + } + }} + PetscFunctionReturnVoid(); +} +EXTERN_C_END diff --git a/src/mat/impls/aij/mpi/makefile b/src/mat/impls/aij/mpi/makefile index 7fec838..378697f 100644 --- a/src/mat/impls/aij/mpi/makefile +++ b/src/mat/impls/aij/mpi/makefile @@ -9,7 +9,7 @@ SOURCEH = mpiaij.h OBJSC = mpiaij.o mmaij.o mpiaijpc.o mpiov.o fdmpiaij.o mpiptap.o mpimatmatmult.o mpb_aij.o OBJSF = LIBBASE = libpetscmat -DIRS = spooles superlu_dist mumps csrperm crl pastix +DIRS = spooles superlu_dist mumps csrperm crl faij pastix MANSEC = Mat LOCDIR = src/mat/impls/aij/mpi/ diff --git a/src/mat/impls/aij/mpi/mpiaij.c b/src/mat/impls/aij/mpi/mpiaij.c index a5ade13..0370683 100644 --- a/src/mat/impls/aij/mpi/mpiaij.c +++ b/src/mat/impls/aij/mpi/mpiaij.c @@ -920,6 +920,7 @@ PetscErrorCode MatDestroy_MPIAIJ(Mat mat) ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); + ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpifaij_C","",PETSC_NULL);CHKERRQ(ierr); PetscFunctionReturn(0); } @@ -4928,6 +4929,7 @@ PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, P EXTERN_C_BEGIN extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); +extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIFAIJ(Mat,const MatType,MatReuse,Mat*); extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); EXTERN_C_END @@ -5102,6 +5104,9 @@ PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", "MatConvert_MPIAIJ_MPICSRPERM", MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); + ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpifaij_C", + "MatConvert_MPIAIJ_MPIFAIJ", + MatConvert_MPIAIJ_MPIFAIJ);CHKERRQ(ierr); ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", "MatConvert_MPIAIJ_MPICRL", MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); diff --git a/src/mat/impls/aij/seq/aij.c b/src/mat/impls/aij/seq/aij.c index 4550da2..4d3b4b7 100644 --- a/src/mat/impls/aij/seq/aij.c +++ b/src/mat/impls/aij/seq/aij.c @@ -818,6 +818,7 @@ PetscErrorCode MatDestroy_SeqAIJ(Mat A) ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatConvert_seqaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatConvert_seqaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatConvert_seqaij_seqcsrperm_C","",PETSC_NULL);CHKERRQ(ierr); + ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatConvert_seqaij_seqfaij_C","",PETSC_NULL);CHKERRQ(ierr); ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatSeqAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatSeqAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); @@ -3384,6 +3385,9 @@ PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqAIJ(Mat B) ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_seqcsrperm_C", "MatConvert_SeqAIJ_SeqCSRPERM", MatConvert_SeqAIJ_SeqCSRPERM);CHKERRQ(ierr); + ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_seqfaij_C", + "MatConvert_SeqAIJ_SeqFAIJ", + MatConvert_SeqAIJ_SeqFAIJ);CHKERRQ(ierr); ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_seqcrl_C", "MatConvert_SeqAIJ_SeqCRL", MatConvert_SeqAIJ_SeqCRL);CHKERRQ(ierr); diff --git a/src/mat/impls/aij/seq/aij.h b/src/mat/impls/aij/seq/aij.h index f8a7a7f..0738922 100644 --- a/src/mat/impls/aij/seq/aij.h +++ b/src/mat/impls/aij/seq/aij.h @@ -220,6 +220,7 @@ EXTERN_C_BEGIN EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqAIJ_SeqSBAIJ(Mat,const MatType,MatReuse,Mat*); EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqAIJ_SeqBAIJ(Mat,const MatType,MatReuse,Mat*); EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqAIJ_SeqCSRPERM(Mat,const MatType,MatReuse,Mat*); +EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqAIJ_SeqFAIJ(Mat,const MatType,MatReuse,Mat*); EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatReorderForNonzeroDiagonal_SeqAIJ(Mat,PetscReal,IS,IS); EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatMatMult_SeqAIJ_SeqAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); EXTERN_C_END diff --git a/src/mat/impls/aij/seq/faij/faij.c b/src/mat/impls/aij/seq/faij/faij.c new file mode 100644 index 0000000..54bc15e --- /dev/null +++ b/src/mat/impls/aij/seq/faij/faij.c @@ -0,0 +1,764 @@ +#define PETSCMAT_DLL + +/* + Defines basic operations for the MATSEQFAIJ matrix class. + This class is derived from the MATSEQAIJ class and retains the + compressed row storage (aka Yale sparse matrix format). + However, FAIJ has an extra dynamic array for flexible value insert. +*/ + +#include "../src/mat/impls/aij/seq/aij.h" +#include "uthash.h" + +typedef struct { + PetscInt row; + PetscInt col; +} +EntryKey; + + +typedef struct { + EntryKey key; + PetscScalar value; + UT_hash_handle hh; +} +Entry; + + +typedef struct { + Entry * entry_ht; /* hash table for matrix entry*/ +} +Mat_SeqFAIJ; + + +extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); + + +EXTERN_C_BEGIN +#undef __FUNCT__ +#define __FUNCT__ "MatConvert_SeqFAIJ_SeqAIJ" +PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqFAIJ_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) { + /* This routine is only called to convert a MATFAIJ to its base PETSc type, */ + /* so we will ignore 'MatType type'. */ + PetscErrorCode ierr; + Mat B = *newmat; + Mat_SeqFAIJ *faij=(Mat_SeqFAIJ*)A->spptr; + Entry *current_entry, *tmp_entry; + + PetscFunctionBegin; + if (reuse == MAT_INITIAL_MATRIX) { + ierr = MatDuplicate(A,MAT_COPY_VALUES,&B); + CHKERRQ(ierr); + } + + /* Reset the original function pointers. */ + B->ops->duplicate = MatDuplicate_SeqAIJ; + B->ops->setvalues = MatSetValues_SeqAIJ; + B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; + B->ops->destroy = MatDestroy_SeqAIJ; + + /* Free everything in the Mat_SeqFAIJ data structure. + * We don't free the Mat_SeqFAIJ struct itself, as this will + * cause problems later when MatDestroy() tries to free it. */ + HASH_ITER(hh, faij->entry_ht, current_entry, tmp_entry) { + HASH_DEL(faij->entry_ht, current_entry); /* delete; users advances to next */ + free(current_entry); /* optional- if you want to free */ + } + + /* Change the type of B to MATSEQAIJ. */ + ierr = PetscObjectChangeTypeName( (PetscObject)B, MATSEQAIJ); + CHKERRQ(ierr); + + *newmat = B; + PetscFunctionReturn(0); +} +EXTERN_C_END + + + + + +#undef __FUNCT__ +#define __FUNCT__ "MatDestroy_SeqFAIJ" +PetscErrorCode MatDestroy_SeqFAIJ(Mat A) { + PetscErrorCode ierr; + Mat_SeqFAIJ *faij = (Mat_SeqFAIJ *) A->spptr; + Entry *current_entry, *tmp_entry; + + PetscFunctionBegin; + + /* Free everything in the Mat_SeqFAIJ data structure. + * Note that we don't need to free the Mat_SeqFAIJ struct + * itself, as MatDestroy() will do so. */ + HASH_ITER(hh, faij->entry_ht, current_entry, tmp_entry) { + HASH_DEL(faij->entry_ht, current_entry); /* delete; users advances to next */ + free(current_entry); /* optional- if you want to free */ + } + faij->entry_ht = 0; + + /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() + * to destroy everything that remains. */ + ierr = PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ); + CHKERRQ(ierr); + /* Note that I don't call MatSetType(). I believe this is because that + * is only to be called when *building* a matrix. I could be wrong, but + * that is how things work for the SuperLU matrix class. */ + ierr = MatDestroy_SeqAIJ(A); + CHKERRQ(ierr); + PetscFunctionReturn(0); +} + + + +PetscErrorCode MatDuplicate_SeqFAIJ(Mat A, MatDuplicateOption op, Mat *M) { + PetscErrorCode ierr; + Mat_SeqFAIJ *faij = (Mat_SeqFAIJ *) A->spptr; + Mat_SeqFAIJ *faij_dest = (Mat_SeqFAIJ *) (*M)->spptr; + + PetscFunctionBegin; + ierr = MatDuplicate_SeqAIJ(A,op,M); + CHKERRQ(ierr); + ierr = PetscMemcpy(faij_dest,faij,sizeof(Mat_SeqFAIJ)); + CHKERRQ(ierr); + + PetscFunctionReturn(0); +} + + +#undef __FUNCT__ +#define __FUNCT__ "MatSetValues_SeqFAIJ" +PetscErrorCode MatSetValues_SeqFAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) { + Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; + PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N; + PetscInt *imax = a->imax,*ai = a->i,*ailen = a->ilen; + PetscInt *aj = a->j,nonew = a->nonew,lastcol = -1; + MatScalar *ap,value,*aa = a->a; + PetscTruth ignorezeroentries = a->ignorezeroentries; + PetscTruth roworiented = a->roworiented; + + Mat_SeqFAIJ *faij=(Mat_SeqFAIJ*)A->spptr; + EntryKey entry_key; + Entry *mat_entry; + + PetscFunctionBegin; + + if (v) + PetscValidScalarPointer(v,6); + for (k=0; k= A->rmap->n) + SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); +#endif + + rp = aj + ai[row]; + ap = aa + ai[row]; + rmax = imax[row]; + nrow = ailen[row]; + low = 0; + high = nrow; + for (l=0; l= A->cmap->n) + SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); +#endif + + col = in[l]; + if (v) { + if (roworiented) { + value = v[l + k*n]; + } else { + value = v[k + l*m]; + } + } else { + value = 0.; + } + if (value == 0.0 && ignorezeroentries && (is == ADD_VALUES)) + continue; + + if (col <= lastcol) + low = 0; + else + high = nrow; + lastcol = col; + while (high-low > 5) { + t = (low+high)/2; + if (rp[t] > col) + high = t; + else + low = t; + } + + + for (i=low; i col) + break; + if (rp[i] == col) { + if (is == ADD_VALUES) + ap[i] += value; + else + ap[i] = value; + low = i + 1; + goto noinsert; + } + } + if (value == 0.0 && ignorezeroentries) + goto noinsert; + if (nonew == 1) + goto noinsert; + if (nonew == -1) + SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at (%D,%D) in the matrix",row,col); + + if(nrow >= rmax) { + /* find if the entry in hash table */ + entry_key.row = row; + entry_key.col = col; + HASH_FIND (hh, faij->entry_ht, &entry_key, sizeof(EntryKey), mat_entry); + if(mat_entry) { + if (is == ADD_VALUES) + mat_entry->value += value; + else + mat_entry->value = value; + } else { + /* add a new entry to hash table */ + mat_entry = calloc(1, sizeof(Entry)); + mat_entry->key.row = row; + mat_entry->key.col = col; + mat_entry->value = value; + HASH_ADD (hh, faij->entry_ht, key, sizeof(EntryKey), mat_entry); + } + goto noinsert; + } + + N = nrow++ - 1; + a->nz++; + high++; + /* shift up all the later entries in this row */ + for (ii=N; ii>=i; ii--) { + rp[ii+1] = rp[ii]; + ap[ii+1] = ap[ii]; + } + rp[i] = col; + ap[i] = value; + low = i + 1; + +noinsert: + ; + } + ailen[row] = nrow; + } + A->same_nonzero = PETSC_FALSE; + + PetscFunctionReturn(0); +} + + +#undef __FUNCT__ +#define __FUNCT__ "MatInsertValue_SeqFAIJ_Private" +PetscErrorCode MatInsertValue_SeqFAIJ_Private(Mat A,PetscInt row,PetscInt col,const PetscScalar value) { + Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; + PetscInt *rp,low,high,t,nrow,i,ii; + PetscInt *ai = a->i,*ailen = a->ilen; + PetscInt *aj = a->j; + MatScalar *ap,*aa = a->a; + + PetscFunctionBegin; + + if (row < 0) + PetscFunctionReturn(0); +#if defined(PETSC_USE_DEBUG) + + if (row >= A->rmap->n) + SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); +#endif + + rp = aj + ai[row]; + ap = aa + ai[row]; + nrow = ailen[row]; + + + if (col < 0) + PetscFunctionReturn(0); +#if defined(PETSC_USE_DEBUG) + + if (col >= A->cmap->n) + SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,A->cmap->n-1); +#endif + + low = 0; + high = nrow; + while (high-low > 5) { + t = (low+high)/2; + if (rp[t] > col) + high = t; + else + low = t; + } + + for (i=low; i col) + break; + if (rp[i] == col) { + ap[i] = value; + PetscFunctionReturn(0); + } + } + + if(nrow >= a->imax[row]) + SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at (%D,%D) in the matrix",row,col); + + /* shift up all the later entries in this row */ + for (ii=nrow-1; ii>=i; ii--) { + rp[ii+1] = rp[ii]; + ap[ii+1] = ap[ii]; + } + rp[i] = col; + ap[i] = value; + + ailen[row]++; + a->nz++; + + PetscFunctionReturn(0); +} + + + +#undef __FUNCT__ +#define __FUNCT__ "MatFlushBuffer_SeqFAIJ" +PetscErrorCode MatFlushBuffer_SeqFAIJ(Mat A) { + Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; + PetscErrorCode ierr; + PetscInt i,j,*ai = a->i,*aj = a->j,*imax = a->imax; + PetscInt m = A->rmap->n, N, *ailen = a->ilen; + MatScalar *aa = a->a; + + PetscInt *new_ilen, *new_i, *new_j; + MatScalar *new_a; + Mat_SeqFAIJ *faij=(Mat_SeqFAIJ*)A->spptr; + Entry *mat_entry, *tmp_entry; + PetscInt entries = HASH_COUNT(faij->entry_ht); + + PetscFunctionBegin; + + /* process values in the hash table */ + if(entries > 0) { + N=0; + PetscMalloc(m*sizeof(PetscInt), &new_ilen); + + /* statistic the mat entries in aij matrix*/ + for (i=0; ientry_ht; mat_entry != NULL; mat_entry=mat_entry->hh.next) { + new_ilen[mat_entry->key.row]++; + } + /* realloc memory fit nz exactly*/ + a->maxnz = N; + ierr = PetscMalloc3(N,MatScalar,&new_a,N,PetscInt,&new_j,A->rmap->n+1,PetscInt,&new_i); + CHKERRQ(ierr); + + /* set row index */ + new_i[0]=0; + for (i=1; i<=m; i++) { + new_i[i] = new_i[i-1] + new_ilen[i-1]; + } + /* copy values to new location */ + for (i=0; ia,&a->j,&a->i); + CHKERRQ(ierr); + + a->a = new_a; + a->i = new_i; + a->j = new_j; + + /* process values in hash table and then free it */ + HASH_ITER(hh, faij->entry_ht, mat_entry, tmp_entry) { + MatInsertValue_SeqFAIJ_Private(A, mat_entry->key.row, mat_entry->key.col, mat_entry->value); + HASH_DEL(faij->entry_ht, mat_entry); /* delete; users advances to next */ + free(mat_entry); /* must free the entry */ + } + faij->entry_ht = 0; + + PetscFree(new_ilen); + } + + PetscFunctionReturn(0); +} + + + + +#undef __FUNCT__ +#define __FUNCT__ "MatAssemblyEnd_SeqFAIJ" +PetscErrorCode MatAssemblyEnd_SeqFAIJ(Mat A, MatAssemblyType mode) { + Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; + PetscErrorCode ierr; + PetscInt fshift = 0,i,j,*ai,*aj,*imax = a->imax; + PetscInt m = A->rmap->n,*ip,N,*ailen = a->ilen,rmax = 0; + MatScalar *aa,*ap; + PetscReal ratio=0.6; + + PetscInt *new_i, *new_j; + MatScalar *new_a; + Mat_SeqFAIJ *faij=(Mat_SeqFAIJ*)A->spptr; + PetscInt entries = HASH_COUNT(faij->entry_ht); + + PetscFunctionBegin; + if (mode == MAT_FLUSH_ASSEMBLY) + PetscFunctionReturn(0); + + /* process values in the hash table */ + MatFlushBuffer_SeqFAIJ(A); + + /* set mat array */ + aa = a->a; + ai = a->i; + aj = a->j; + + if (m) + rmax = ailen[0]; /* determine row with most nonzeros */ + for (i=1; inz = ai[m]; + if (fshift && a->nounused == -1) { + SETERRQ3(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D, %D unneeded", m, A->cmap->n, fshift); + } + + if(a->maxnz > a->nz) { + ierr = PetscMalloc3(a->nz,MatScalar,&new_a,a->nz,PetscInt,&new_j,A->rmap->n+1,PetscInt,&new_i); + CHKERRQ(ierr); + ierr = PetscMemcpy(new_a,a->a,a->nz*sizeof(MatScalar)); + CHKERRQ(ierr); + ierr = PetscMemcpy(new_i,a->i,(A->rmap->n+1)*sizeof(PetscInt)); + CHKERRQ(ierr); + ierr = PetscMemcpy(new_j,a->j,a->nz*sizeof(PetscInt)); + CHKERRQ(ierr); + ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i); + CHKERRQ(ierr); + aa = a->a = new_a; + ai = a->i = new_i; + aj = a->j = new_j; + a->maxnz = a->nz; + } + + ierr = MatMarkDiagonal_SeqAIJ(A); + CHKERRQ(ierr); + ierr = PetscInfo4(A,"Matrix size: %D X %D; storage space: %D unneeded,%D used\n",m,A->cmap->n,fshift,a->nz); + CHKERRQ(ierr); + ierr = PetscInfo1(A,"Nonzeros in hash table is %D\n",entries); + CHKERRQ(ierr); + ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",rmax); + CHKERRQ(ierr); + + + a->reallocs = 0; + A->info.nz_unneeded = (double)fshift; + a->rmax = rmax; + + /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ + ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,m,ratio); + CHKERRQ(ierr); + A->same_nonzero = PETSC_TRUE; + + ierr = MatAssemblyEnd_SeqAIJ_Inode(A,mode); + CHKERRQ(ierr); + + a->idiagvalid = PETSC_FALSE; + PetscFunctionReturn(0); +} + + + + + +/* MatConvert_SeqAIJ_SeqCSRPERM converts a SeqAIJ matrix into a + * SeqFAIJ matrix. This routine is called by the MatCreate_SeqFAIJ() + * routine, but can also be used to convert an assembled SeqAIJ matrix + * into a SeqFAIJ one. */ +EXTERN_C_BEGIN +#undef __FUNCT__ +#define __FUNCT__ "MatConvert_SeqAIJ_SeqFAIJ" +PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqAIJ_SeqFAIJ(Mat A,const MatType type,MatReuse reuse,Mat *newmat) { + PetscErrorCode ierr; + Mat B = *newmat; + Mat_SeqFAIJ *faij; + + PetscFunctionBegin; + if (reuse == MAT_INITIAL_MATRIX) { + ierr = MatDuplicate(A,MAT_COPY_VALUES,&B); + CHKERRQ(ierr); + } + + ierr = PetscNewLog(B,Mat_SeqFAIJ,&faij); + CHKERRQ(ierr); + B->spptr = (void *) faij; + + /* Set function pointers for methods that we inherit from AIJ but override. */ + B->ops->duplicate = MatDuplicate_SeqFAIJ; + B->ops->setvalues = MatSetValues_SeqFAIJ; + B->ops->assemblyend = MatAssemblyEnd_SeqFAIJ; + B->ops->destroy = MatDestroy_SeqFAIJ; + + ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqfaij_seqaij_C", + "MatConvert_SeqFAIJ_SeqAIJ",MatConvert_SeqFAIJ_SeqAIJ); + CHKERRQ(ierr); + + ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQFAIJ); + CHKERRQ(ierr); + *newmat = B; + PetscFunctionReturn(0); +} +EXTERN_C_END + + +#undef __FUNCT__ +#define __FUNCT__ "MatCreateSeqFAIJ" +/*@C + MatCreateSeqFAIJ - Creates a sparse matrix of type SEQFAIJ. + This type inherits from AIJ, but with extra dynamic array for flexible + value insert. An approximate nonzero pattern will not cause serious performance + lose. + + Collective on MPI_Comm + + Input Parameters: ++ comm - MPI communicator, set to PETSC_COMM_SELF +. m - number of rows +. n - number of columns +. nz - number of nonzeros per row (same for all rows) +- nnz - array containing the number of nonzeros in the various rows + (possibly different for each row) or PETSC_NULL + + Output Parameter: +. A - the matrix + + Notes: + If nnz is given then nz is ignored + + Level: intermediate + +.keywords: matrix, cray, sparse, parallel + +.seealso: MatCreate(), MatCreateMPIFAIJ(), MatSetValues() +@*/ +PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqFAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) { + PetscErrorCode ierr; + + PetscFunctionBegin; + ierr = MatCreate(comm,A); + CHKERRQ(ierr); + ierr = MatSetSizes(*A,m,n,m,n); + CHKERRQ(ierr); + ierr = MatSetType(*A,MATSEQFAIJ); + CHKERRQ(ierr); + ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz); + CHKERRQ(ierr); + PetscFunctionReturn(0); +} + +EXTERN_C_BEGIN +#undef __FUNCT__ +#define __FUNCT__ "MatCreate_SeqFAIJ" +PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqFAIJ(Mat A) { + PetscErrorCode ierr; + + PetscFunctionBegin; + ierr = MatSetType(A,MATSEQAIJ); + CHKERRQ(ierr); + ierr = MatConvert_SeqAIJ_SeqFAIJ(A,MATSEQFAIJ,MAT_REUSE_MATRIX,&A); + CHKERRQ(ierr); + PetscFunctionReturn(0); +} +EXTERN_C_END + + + + +/* + Special version for direct calls from Fortran +*/ +#if defined(PETSC_HAVE_FORTRAN_CAPS) +#define matsetvaluesseqaij_ MATSETVALUESSEQAIJ +#elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) +#define matsetvaluesseqaij_ matsetvaluesseqaij +#endif + +/* Change these macros so can be used in void function */ +#undef CHKERRQ +#define CHKERRQ(ierr) CHKERRABORT(((PetscObject)A)->comm,ierr) +#undef SETERRQ2 +#define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)A)->comm,ierr) + +EXTERN_C_BEGIN +#undef __FUNCT__ +#define __FUNCT__ "matsetvaluesseqfaij_" +void PETSC_STDCALL matsetvaluesseqfaij_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[],InsertMode *isis, PetscErrorCode *_ierr) { + Mat A = *AA; + PetscInt m = *mm, n = *nn; + InsertMode is = *isis; + Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; + PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N; + PetscInt *imax,*ai,*ailen; + PetscErrorCode ierr; + PetscInt *aj,nonew = a->nonew,lastcol = -1; + MatScalar *ap,value,*aa; + PetscTruth ignorezeroentries = a->ignorezeroentries; + PetscTruth roworiented = a->roworiented; + + Mat_SeqFAIJ *faij=(Mat_SeqFAIJ*)A->spptr; + EntryKey entry_key; + Entry *mat_entry; + + PetscFunctionBegin; + ierr = MatPreallocated(A); + CHKERRQ(ierr); + imax = a->imax; + ai = a->i; + ailen = a->ilen; + aj = a->j; + aa = a->a; + + for (k=0; k= A->rmap->n) + SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); +#endif + + rp = aj + ai[row]; + ap = aa + ai[row]; + rmax = imax[row]; + nrow = ailen[row]; + low = 0; + high = nrow; + for (l=0; l= A->cmap->n) + SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); +#endif + + col = in[l]; + if (roworiented) { + value = v[l + k*n]; + } else { + value = v[k + l*m]; + } + if (value == 0.0 && ignorezeroentries && (is == ADD_VALUES)) + continue; + + if (col <= lastcol) + low = 0; + else + high = nrow; + lastcol = col; + while (high-low > 5) { + t = (low+high)/2; + if (rp[t] > col) + high = t; + else + low = t; + } + for (i=low; i col) + break; + if (rp[i] == col) { + if (is == ADD_VALUES) + ap[i] += value; + else + ap[i] = value; + goto noinsert; + } + } + if (value == 0.0 && ignorezeroentries) + goto noinsert; + if (nonew == 1) + goto noinsert; + if (nonew == -1) + SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); + + if(nrow >= rmax) { + /* find if the entry in hash table */ + entry_key.row = row; + entry_key.col = col; + HASH_FIND (hh, faij->entry_ht, &entry_key, sizeof(EntryKey), mat_entry); + if(mat_entry) { + if (is == ADD_VALUES) + mat_entry->value += value; + else + mat_entry->value = value; + } else { + /* add a new entry to hash table */ + mat_entry = calloc(1, sizeof(Entry)); + mat_entry->key.row = row; + mat_entry->key.col = col; + mat_entry->value = value; + HASH_ADD (hh, faij->entry_ht, key, sizeof(EntryKey), mat_entry); + } + goto noinsert; + } + + N = nrow++ - 1; + a->nz++; + high++; + /* shift up all the later entries in this row */ + for (ii=N; ii>=i; ii--) { + rp[ii+1] = rp[ii]; + ap[ii+1] = ap[ii]; + } + rp[i] = col; + ap[i] = value; +noinsert: + ; + low = i + 1; + } + ailen[row] = nrow; + } + A->same_nonzero = PETSC_FALSE; + PetscFunctionReturnVoid(); +} +EXTERN_C_END diff --git a/src/mat/impls/aij/seq/faij/makefile b/src/mat/impls/aij/seq/faij/makefile new file mode 100644 index 0000000..1d3d544 --- /dev/null +++ b/src/mat/impls/aij/seq/faij/makefile @@ -0,0 +1,17 @@ +ALL: lib + +CFLAGS = +FFLAGS = +SOURCEC = faij.c +SOURCEF = +SOURCEH = +OBJSC = faij.o +OBJSF = +LIBBASE = libpetscmat +DIRS = +MANSEC = Mat +LOCDIR = src/mat/impls/aij/seq/faij/ + +include ${PETSC_DIR}/conf/variables +include ${PETSC_DIR}/conf/rules +include ${PETSC_DIR}/conf/test diff --git a/src/mat/impls/aij/seq/faij/uthash.h b/src/mat/impls/aij/seq/faij/uthash.h new file mode 100644 index 0000000..a4bdc18 --- /dev/null +++ b/src/mat/impls/aij/seq/faij/uthash.h @@ -0,0 +1,972 @@ +/* +Copyright (c) 2003-2010, Troy D. Hanson http://uthash.sourceforge.net +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef UTHASH_H +#define UTHASH_H + +#include /* memcmp,strlen */ +#include /* ptrdiff_t */ + +/* These macros use decltype or the earlier __typeof GNU extension. + As decltype is only available in newer compilers (VS2010 or gcc 4.3+ + when compiling c++ source) this code uses whatever method is needed + or, for VS2008 where neither is available, uses casting workarounds. */ +#ifdef _MSC_VER /* MS compiler */ +#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ +#define DECLTYPE(x) (decltype(x)) +#else /* VS2008 or older (or VS2010 in C mode) */ +#define NO_DECLTYPE +#define DECLTYPE(x) +#endif +#else /* GNU, Sun and other compilers */ +#define DECLTYPE(x) (__typeof(x)) +#endif + +#ifdef NO_DECLTYPE +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + char **_da_dst = (char**)(&(dst)); \ + *_da_dst = (char*)(src); \ +} while(0) +#else +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + (dst) = DECLTYPE(dst)(src); \ +} while(0) +#endif + +/* a number of the hash function use uint32_t which isn't defined on win32 */ +#ifdef _MSC_VER +typedef unsigned int uint32_t; +#else +#include /* uint32_t */ +#endif + +#define UTHASH_VERSION 1.9.3 + +#define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ +#define uthash_malloc(sz) malloc(sz) /* malloc fcn */ +#define uthash_free(ptr,sz) free(ptr) /* free fcn */ + +#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ +#define uthash_expand_fyi(tbl) /* can be defined to log expands */ + +/* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */ +#define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */ + +/* calculate the element whose hash handle address is hhe */ +#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) + +#define HASH_FIND(hh,head,keyptr,keylen,out) \ +do { \ + unsigned _hf_bkt,_hf_hashv; \ + out=NULL; \ + if (head) { \ + HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \ + if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \ + HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \ + keyptr,keylen,out); \ + } \ + } \ +} while (0) + +#ifdef HASH_BLOOM +#define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM) +#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0) +#define HASH_BLOOM_MAKE(tbl) \ +do { \ + (tbl)->bloom_nbits = HASH_BLOOM; \ + (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ + if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ + memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ + (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ +} while (0); + +#define HASH_BLOOM_FREE(tbl) \ +do { \ + uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ +} while (0); + +#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8))) +#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8))) + +#define HASH_BLOOM_ADD(tbl,hashv) \ + HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) + +#define HASH_BLOOM_TEST(tbl,hashv) \ + HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) + +#else +#define HASH_BLOOM_MAKE(tbl) +#define HASH_BLOOM_FREE(tbl) +#define HASH_BLOOM_ADD(tbl,hashv) +#define HASH_BLOOM_TEST(tbl,hashv) (1) +#endif + +#define HASH_MAKE_TABLE(hh,head) \ +do { \ + (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ + sizeof(UT_hash_table)); \ + if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ + memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ + (head)->hh.tbl->tail = &((head)->hh); \ + (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ + (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ + (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ + (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ + memset((head)->hh.tbl->buckets, 0, \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_MAKE((head)->hh.tbl); \ + (head)->hh.tbl->signature = HASH_SIGNATURE; \ +} while(0) + +#define HASH_ADD(hh,head,fieldname,keylen_in,add) \ + HASH_ADD_KEYPTR(hh,head,&add->fieldname,keylen_in,add) + +#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ +do { \ + unsigned _ha_bkt; \ + (add)->hh.next = NULL; \ + (add)->hh.key = (char*)keyptr; \ + (add)->hh.keylen = keylen_in; \ + if (!(head)) { \ + head = (add); \ + (head)->hh.prev = NULL; \ + HASH_MAKE_TABLE(hh,head); \ + } else { \ + (head)->hh.tbl->tail->next = (add); \ + (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ + (head)->hh.tbl->tail = &((add)->hh); \ + } \ + (head)->hh.tbl->num_items++; \ + (add)->hh.tbl = (head)->hh.tbl; \ + HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \ + (add)->hh.hashv, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \ + HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \ + HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \ + HASH_FSCK(hh,head); \ +} while(0) + +#define HASH_TO_BKT( hashv, num_bkts, bkt ) \ +do { \ + bkt = ((hashv) & ((num_bkts) - 1)); \ +} while(0) + +/* delete "delptr" from the hash table. + * "the usual" patch-up process for the app-order doubly-linked-list. + * The use of _hd_hh_del below deserves special explanation. + * These used to be expressed using (delptr) but that led to a bug + * if someone used the same symbol for the head and deletee, like + * HASH_DELETE(hh,users,users); + * We want that to work, but by changing the head (users) below + * we were forfeiting our ability to further refer to the deletee (users) + * in the patch-up process. Solution: use scratch space to + * copy the deletee pointer, then the latter references are via that + * scratch pointer rather than through the repointed (users) symbol. + */ +#define HASH_DELETE(hh,head,delptr) \ +do { \ + unsigned _hd_bkt; \ + struct UT_hash_handle *_hd_hh_del; \ + if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + head = NULL; \ + } else { \ + _hd_hh_del = &((delptr)->hh); \ + if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ + (head)->hh.tbl->tail = \ + (UT_hash_handle*)((char*)((delptr)->hh.prev) + \ + (head)->hh.tbl->hho); \ + } \ + if ((delptr)->hh.prev) { \ + ((UT_hash_handle*)((char*)((delptr)->hh.prev) + \ + (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ + } else { \ + DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ + } \ + if (_hd_hh_del->next) { \ + ((UT_hash_handle*)((char*)_hd_hh_del->next + \ + (head)->hh.tbl->hho))->prev = \ + _hd_hh_del->prev; \ + } \ + HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ + HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ + (head)->hh.tbl->num_items--; \ + } \ + HASH_FSCK(hh,head); \ +} while (0) + + +/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ +#define HASH_FIND_STR(head,findstr,out) \ + HASH_FIND(hh,head,findstr,strlen(findstr),out) +#define HASH_ADD_STR(head,strfield,add) \ + HASH_ADD(hh,head,strfield,strlen(add->strfield),add) +#define HASH_FIND_INT(head,findint,out) \ + HASH_FIND(hh,head,findint,sizeof(int),out) +#define HASH_ADD_INT(head,intfield,add) \ + HASH_ADD(hh,head,intfield,sizeof(int),add) +#define HASH_FIND_PTR(head,findptr,out) \ + HASH_FIND(hh,head,findptr,sizeof(void *),out) +#define HASH_ADD_PTR(head,ptrfield,add) \ + HASH_ADD(hh,head,ptrfield,sizeof(void *),add) +#define HASH_DEL(head,delptr) \ + HASH_DELETE(hh,head,delptr) + +/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. + * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. + */ +#ifdef HASH_DEBUG +#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) +#define HASH_FSCK(hh,head) \ +do { \ + unsigned _bkt_i; \ + unsigned _count, _bkt_count; \ + char *_prev; \ + struct UT_hash_handle *_thh; \ + if (head) { \ + _count = 0; \ + for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ + _bkt_count = 0; \ + _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ + _prev = NULL; \ + while (_thh) { \ + if (_prev != (char*)(_thh->hh_prev)) { \ + HASH_OOPS("invalid hh_prev %p, actual %p\n", \ + _thh->hh_prev, _prev ); \ + } \ + _bkt_count++; \ + _prev = (char*)(_thh); \ + _thh = _thh->hh_next; \ + } \ + _count += _bkt_count; \ + if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ + HASH_OOPS("invalid bucket count %d, actual %d\n", \ + (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ + } \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid hh item count %d, actual %d\n", \ + (head)->hh.tbl->num_items, _count ); \ + } \ + /* traverse hh in app order; check next/prev integrity, count */ \ + _count = 0; \ + _prev = NULL; \ + _thh = &(head)->hh; \ + while (_thh) { \ + _count++; \ + if (_prev !=(char*)(_thh->prev)) { \ + HASH_OOPS("invalid prev %p, actual %p\n", \ + _thh->prev, _prev ); \ + } \ + _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ + _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ + (head)->hh.tbl->hho) : NULL ); \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid app item count %d, actual %d\n", \ + (head)->hh.tbl->num_items, _count ); \ + } \ + } \ +} while (0) +#else +#define HASH_FSCK(hh,head) +#endif + +/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to + * the descriptor to which this macro is defined for tuning the hash function. + * The app can #include to get the prototype for write(2). */ +#ifdef HASH_EMIT_KEYS +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ +do { \ + unsigned _klen = fieldlen; \ + write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ + write(HASH_EMIT_KEYS, keyptr, fieldlen); \ +} while (0) +#else +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) +#endif + +/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ +#ifdef HASH_FUNCTION +#define HASH_FCN HASH_FUNCTION +#else +#define HASH_FCN HASH_JEN +#endif + +/* The Bernstein hash function, used in Perl prior to v5.6 */ +#define HASH_BER(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _hb_keylen=keylen; \ + char *_hb_key=(char*)(key); \ + (hashv) = 0; \ + while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \ + bkt = (hashv) & (num_bkts-1); \ +} while (0) + + +/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at + * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ +#define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _sx_i; \ + char *_hs_key=(char*)(key); \ + hashv = 0; \ + for(_sx_i=0; _sx_i < keylen; _sx_i++) \ + hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ + bkt = hashv & (num_bkts-1); \ +} while (0) + +#define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _fn_i; \ + char *_hf_key=(char*)(key); \ + hashv = 2166136261UL; \ + for(_fn_i=0; _fn_i < keylen; _fn_i++) \ + hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \ + bkt = hashv & (num_bkts-1); \ +} while(0); + +#define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _ho_i; \ + char *_ho_key=(char*)(key); \ + hashv = 0; \ + for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ + hashv += _ho_key[_ho_i]; \ + hashv += (hashv << 10); \ + hashv ^= (hashv >> 6); \ + } \ + hashv += (hashv << 3); \ + hashv ^= (hashv >> 11); \ + hashv += (hashv << 15); \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +#define HASH_JEN_MIX(a,b,c) \ +do { \ + a -= b; a -= c; a ^= ( c >> 13 ); \ + b -= c; b -= a; b ^= ( a << 8 ); \ + c -= a; c -= b; c ^= ( b >> 13 ); \ + a -= b; a -= c; a ^= ( c >> 12 ); \ + b -= c; b -= a; b ^= ( a << 16 ); \ + c -= a; c -= b; c ^= ( b >> 5 ); \ + a -= b; a -= c; a ^= ( c >> 3 ); \ + b -= c; b -= a; b ^= ( a << 10 ); \ + c -= a; c -= b; c ^= ( b >> 15 ); \ +} while (0) + +#define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _hj_i,_hj_j,_hj_k; \ + char *_hj_key=(char*)(key); \ + hashv = 0xfeedbeef; \ + _hj_i = _hj_j = 0x9e3779b9; \ + _hj_k = keylen; \ + while (_hj_k >= 12) { \ + _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ + + ( (unsigned)_hj_key[2] << 16 ) \ + + ( (unsigned)_hj_key[3] << 24 ) ); \ + _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ + + ( (unsigned)_hj_key[6] << 16 ) \ + + ( (unsigned)_hj_key[7] << 24 ) ); \ + hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ + + ( (unsigned)_hj_key[10] << 16 ) \ + + ( (unsigned)_hj_key[11] << 24 ) ); \ + \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + \ + _hj_key += 12; \ + _hj_k -= 12; \ + } \ + hashv += keylen; \ + switch ( _hj_k ) { \ + case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \ + case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \ + case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \ + case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \ + case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \ + case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \ + case 5: _hj_j += _hj_key[4]; \ + case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \ + case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \ + case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \ + case 1: _hj_i += _hj_key[0]; \ + } \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +/* The Paul Hsieh hash function */ +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ + || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) +#define get16bits(d) (*((const uint16_t *) (d))) +#endif + +#if !defined (get16bits) +#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ + +(uint32_t)(((const uint8_t *)(d))[0]) ) +#endif +#define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \ +do { \ + char *_sfh_key=(char*)(key); \ + uint32_t _sfh_tmp, _sfh_len = keylen; \ + \ + int _sfh_rem = _sfh_len & 3; \ + _sfh_len >>= 2; \ + hashv = 0xcafebabe; \ + \ + /* Main loop */ \ + for (;_sfh_len > 0; _sfh_len--) { \ + hashv += get16bits (_sfh_key); \ + _sfh_tmp = (get16bits (_sfh_key+2) << 11) ^ hashv; \ + hashv = (hashv << 16) ^ _sfh_tmp; \ + _sfh_key += 2*sizeof (uint16_t); \ + hashv += hashv >> 11; \ + } \ + \ + /* Handle end cases */ \ + switch (_sfh_rem) { \ + case 3: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 16; \ + hashv ^= _sfh_key[sizeof (uint16_t)] << 18; \ + hashv += hashv >> 11; \ + break; \ + case 2: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 11; \ + hashv += hashv >> 17; \ + break; \ + case 1: hashv += *_sfh_key; \ + hashv ^= hashv << 10; \ + hashv += hashv >> 1; \ + } \ + \ + /* Force "avalanching" of final 127 bits */ \ + hashv ^= hashv << 3; \ + hashv += hashv >> 5; \ + hashv ^= hashv << 4; \ + hashv += hashv >> 17; \ + hashv ^= hashv << 25; \ + hashv += hashv >> 6; \ + bkt = hashv & (num_bkts-1); \ +} while(0); + +#ifdef HASH_USING_NO_STRICT_ALIASING +/* The MurmurHash exploits some CPU's (e.g. x86) tolerance for unaligned reads. + * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. + * So MurmurHash comes in two versions, the faster unaligned one and the slower + * aligned one. We only use the faster one on CPU's where we know it's safe. + * + * Note the preprocessor built-in defines can be emitted using: + * + * gcc -m64 -dM -E - < /dev/null (on gcc) + * cc -## a.c (where a.c is a simple test file) (Sun Studio) + */ +#if (defined(__i386__) || defined(__x86_64__)) +#define HASH_MUR HASH_MUR_UNALIGNED +#else +#define HASH_MUR HASH_MUR_ALIGNED +#endif + +/* Appleby's MurmurHash fast version for unaligned-tolerant archs like i386 */ +#define HASH_MUR_UNALIGNED(key,keylen,num_bkts,hashv,bkt) \ +do { \ + const unsigned int _mur_m = 0x5bd1e995; \ + const int _mur_r = 24; \ + hashv = 0xcafebabe ^ keylen; \ + char *_mur_key = (char *)(key); \ + uint32_t _mur_tmp, _mur_len = keylen; \ + \ + for (;_mur_len >= 4; _mur_len-=4) { \ + _mur_tmp = *(uint32_t *)_mur_key; \ + _mur_tmp *= _mur_m; \ + _mur_tmp ^= _mur_tmp >> _mur_r; \ + _mur_tmp *= _mur_m; \ + hashv *= _mur_m; \ + hashv ^= _mur_tmp; \ + _mur_key += 4; \ + } \ + \ + switch(_mur_len) \ + { \ + case 3: hashv ^= _mur_key[2] << 16; \ + case 2: hashv ^= _mur_key[1] << 8; \ + case 1: hashv ^= _mur_key[0]; \ + hashv *= _mur_m; \ + }; \ + \ + hashv ^= hashv >> 13; \ + hashv *= _mur_m; \ + hashv ^= hashv >> 15; \ + \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +/* Appleby's MurmurHash version for alignment-sensitive archs like Sparc */ +#define HASH_MUR_ALIGNED(key,keylen,num_bkts,hashv,bkt) \ +do { \ + const unsigned int _mur_m = 0x5bd1e995; \ + const int _mur_r = 24; \ + hashv = 0xcafebabe ^ (keylen); \ + char *_mur_key = (char *)(key); \ + uint32_t _mur_len = keylen; \ + int _mur_align = (int)_mur_key & 3; \ + \ + if (_mur_align && (_mur_len >= 4)) { \ + unsigned _mur_t = 0, _mur_d = 0; \ + switch(_mur_align) { \ + case 1: _mur_t |= _mur_key[2] << 16; \ + case 2: _mur_t |= _mur_key[1] << 8; \ + case 3: _mur_t |= _mur_key[0]; \ + } \ + _mur_t <<= (8 * _mur_align); \ + _mur_key += 4-_mur_align; \ + _mur_len -= 4-_mur_align; \ + int _mur_sl = 8 * (4-_mur_align); \ + int _mur_sr = 8 * _mur_align; \ + \ + for (;_mur_len >= 4; _mur_len-=4) { \ + _mur_d = *(unsigned *)_mur_key; \ + _mur_t = (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \ + unsigned _mur_k = _mur_t; \ + _mur_k *= _mur_m; \ + _mur_k ^= _mur_k >> _mur_r; \ + _mur_k *= _mur_m; \ + hashv *= _mur_m; \ + hashv ^= _mur_k; \ + _mur_t = _mur_d; \ + _mur_key += 4; \ + } \ + _mur_d = 0; \ + if(_mur_len >= _mur_align) { \ + switch(_mur_align) { \ + case 3: _mur_d |= _mur_key[2] << 16; \ + case 2: _mur_d |= _mur_key[1] << 8; \ + case 1: _mur_d |= _mur_key[0]; \ + } \ + unsigned _mur_k = (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \ + _mur_k *= _mur_m; \ + _mur_k ^= _mur_k >> _mur_r; \ + _mur_k *= _mur_m; \ + hashv *= _mur_m; \ + hashv ^= _mur_k; \ + _mur_k += _mur_align; \ + _mur_len -= _mur_align; \ + \ + switch(_mur_len) \ + { \ + case 3: hashv ^= _mur_key[2] << 16; \ + case 2: hashv ^= _mur_key[1] << 8; \ + case 1: hashv ^= _mur_key[0]; \ + hashv *= _mur_m; \ + } \ + } else { \ + switch(_mur_len) \ + { \ + case 3: _mur_d ^= _mur_key[2] << 16; \ + case 2: _mur_d ^= _mur_key[1] << 8; \ + case 1: _mur_d ^= _mur_key[0]; \ + case 0: hashv ^= (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \ + hashv *= _mur_m; \ + } \ + } \ + \ + hashv ^= hashv >> 13; \ + hashv *= _mur_m; \ + hashv ^= hashv >> 15; \ + } else { \ + for (;_mur_len >= 4; _mur_len-=4) { \ + unsigned _mur_k = *(unsigned*)_mur_key; \ + _mur_k *= _mur_m; \ + _mur_k ^= _mur_k >> _mur_r; \ + _mur_k *= _mur_m; \ + hashv *= _mur_m; \ + hashv ^= _mur_k; \ + _mur_key += 4; \ + } \ + switch(_mur_len) \ + { \ + case 3: hashv ^= _mur_key[2] << 16; \ + case 2: hashv ^= _mur_key[1] << 8; \ + case 1: hashv ^= _mur_key[0]; \ + hashv *= _mur_m; \ + } \ + \ + hashv ^= hashv >> 13; \ + hashv *= _mur_m; \ + hashv ^= hashv >> 15; \ + } \ + bkt = hashv & (num_bkts-1); \ +} while(0) +#endif /* HASH_USING_NO_STRICT_ALIASING */ + +/* key comparison function; return 0 if keys equal */ +#define HASH_KEYCMP(a,b,len) memcmp(a,b,len) + +/* iterate over items in a known bucket to find desired item */ +#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \ +do { \ + if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \ + else out=NULL; \ + while (out) { \ + if (out->hh.keylen == keylen_in) { \ + if ((HASH_KEYCMP(out->hh.key,keyptr,keylen_in)) == 0) break; \ + } \ + if (out->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,out->hh.hh_next)); \ + else out = NULL; \ + } \ +} while(0) + +/* add an item to a bucket */ +#define HASH_ADD_TO_BKT(head,addhh) \ +do { \ + head.count++; \ + (addhh)->hh_next = head.hh_head; \ + (addhh)->hh_prev = NULL; \ + if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \ + (head).hh_head=addhh; \ + if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \ + && (addhh)->tbl->noexpand != 1) { \ + HASH_EXPAND_BUCKETS((addhh)->tbl); \ + } \ +} while(0) + +/* remove an item from a given bucket */ +#define HASH_DEL_IN_BKT(hh,head,hh_del) \ + (head).count--; \ + if ((head).hh_head == hh_del) { \ + (head).hh_head = hh_del->hh_next; \ + } \ + if (hh_del->hh_prev) { \ + hh_del->hh_prev->hh_next = hh_del->hh_next; \ + } \ + if (hh_del->hh_next) { \ + hh_del->hh_next->hh_prev = hh_del->hh_prev; \ + } + +/* Bucket expansion has the effect of doubling the number of buckets + * and redistributing the items into the new buckets. Ideally the + * items will distribute more or less evenly into the new buckets + * (the extent to which this is true is a measure of the quality of + * the hash function as it applies to the key domain). + * + * With the items distributed into more buckets, the chain length + * (item count) in each bucket is reduced. Thus by expanding buckets + * the hash keeps a bound on the chain length. This bounded chain + * length is the essence of how a hash provides constant time lookup. + * + * The calculation of tbl->ideal_chain_maxlen below deserves some + * explanation. First, keep in mind that we're calculating the ideal + * maximum chain length based on the *new* (doubled) bucket count. + * In fractions this is just n/b (n=number of items,b=new num buckets). + * Since the ideal chain length is an integer, we want to calculate + * ceil(n/b). We don't depend on floating point arithmetic in this + * hash, so to calculate ceil(n/b) with integers we could write + * + * ceil(n/b) = (n/b) + ((n%b)?1:0) + * + * and in fact a previous version of this hash did just that. + * But now we have improved things a bit by recognizing that b is + * always a power of two. We keep its base 2 log handy (call it lb), + * so now we can write this with a bit shift and logical AND: + * + * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) + * + */ +#define HASH_EXPAND_BUCKETS(tbl) \ +do { \ + unsigned _he_bkt; \ + unsigned _he_bkt_i; \ + struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ + UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ + _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ + 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ + memset(_he_new_buckets, 0, \ + 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + tbl->ideal_chain_maxlen = \ + (tbl->num_items >> (tbl->log2_num_buckets+1)) + \ + ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \ + tbl->nonideal_items = 0; \ + for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ + { \ + _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ + while (_he_thh) { \ + _he_hh_nxt = _he_thh->hh_next; \ + HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \ + _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ + if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ + tbl->nonideal_items++; \ + _he_newbkt->expand_mult = _he_newbkt->count / \ + tbl->ideal_chain_maxlen; \ + } \ + _he_thh->hh_prev = NULL; \ + _he_thh->hh_next = _he_newbkt->hh_head; \ + if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \ + _he_thh; \ + _he_newbkt->hh_head = _he_thh; \ + _he_thh = _he_hh_nxt; \ + } \ + } \ + uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ + tbl->num_buckets *= 2; \ + tbl->log2_num_buckets++; \ + tbl->buckets = _he_new_buckets; \ + tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ + (tbl->ineff_expands+1) : 0; \ + if (tbl->ineff_expands > 1) { \ + tbl->noexpand=1; \ + uthash_noexpand_fyi(tbl); \ + } \ + uthash_expand_fyi(tbl); \ +} while(0) + + +/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ +/* Note that HASH_SORT assumes the hash handle name to be hh. + * HASH_SRT was added to allow the hash handle name to be passed in. */ +#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) +#define HASH_SRT(hh,head,cmpfcn) \ +do { \ + unsigned _hs_i; \ + unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ + struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ + if (head) { \ + _hs_insize = 1; \ + _hs_looping = 1; \ + _hs_list = &((head)->hh); \ + while (_hs_looping) { \ + _hs_p = _hs_list; \ + _hs_list = NULL; \ + _hs_tail = NULL; \ + _hs_nmerges = 0; \ + while (_hs_p) { \ + _hs_nmerges++; \ + _hs_q = _hs_p; \ + _hs_psize = 0; \ + for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ + _hs_psize++; \ + _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + if (! (_hs_q) ) break; \ + } \ + _hs_qsize = _hs_insize; \ + while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \ + if (_hs_psize == 0) { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \ + _hs_e = _hs_p; \ + _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ + ((void*)((char*)(_hs_p->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_psize--; \ + } else if (( \ + cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ + DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ + ) <= 0) { \ + _hs_e = _hs_p; \ + _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ + ((void*)((char*)(_hs_p->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_psize--; \ + } else { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } \ + if ( _hs_tail ) { \ + _hs_tail->next = ((_hs_e) ? \ + ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ + } else { \ + _hs_list = _hs_e; \ + } \ + _hs_e->prev = ((_hs_tail) ? \ + ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ + _hs_tail = _hs_e; \ + } \ + _hs_p = _hs_q; \ + } \ + _hs_tail->next = NULL; \ + if ( _hs_nmerges <= 1 ) { \ + _hs_looping=0; \ + (head)->hh.tbl->tail = _hs_tail; \ + DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ + } \ + _hs_insize *= 2; \ + } \ + HASH_FSCK(hh,head); \ + } \ +} while (0) + +/* This function selects items from one hash into another hash. + * The end result is that the selected items have dual presence + * in both hashes. There is no copy of the items made; rather + * they are added into the new hash through a secondary hash + * hash handle that must be present in the structure. */ +#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ +do { \ + unsigned _src_bkt, _dst_bkt; \ + void *_last_elt=NULL, *_elt; \ + UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ + ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ + if (src) { \ + for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ + for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ + _src_hh; \ + _src_hh = _src_hh->hh_next) { \ + _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ + if (cond(_elt)) { \ + _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ + _dst_hh->key = _src_hh->key; \ + _dst_hh->keylen = _src_hh->keylen; \ + _dst_hh->hashv = _src_hh->hashv; \ + _dst_hh->prev = _last_elt; \ + _dst_hh->next = NULL; \ + if (_last_elt_hh) { _last_elt_hh->next = _elt; } \ + if (!dst) { \ + DECLTYPE_ASSIGN(dst,_elt); \ + HASH_MAKE_TABLE(hh_dst,dst); \ + } else { \ + _dst_hh->tbl = (dst)->hh_dst.tbl; \ + } \ + HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ + HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ + (dst)->hh_dst.tbl->num_items++; \ + _last_elt = _elt; \ + _last_elt_hh = _dst_hh; \ + } \ + } \ + } \ + } \ + HASH_FSCK(hh_dst,dst); \ +} while (0) + +#define HASH_CLEAR(hh,head) \ +do { \ + if (head) { \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head)=NULL; \ + } \ +} while(0) + +#ifdef NO_DECLTYPE +#define HASH_ITER(hh,head,el,tmp) \ +for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \ + el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL)) +#else +#define HASH_ITER(hh,head,el,tmp) \ +for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \ + el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL)) +#endif + +/* obtain a count of items in the hash */ +#define HASH_COUNT(head) HASH_CNT(hh,head) +#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0) + +typedef struct UT_hash_bucket { + struct UT_hash_handle *hh_head; + unsigned count; + + /* expand_mult is normally set to 0. In this situation, the max chain length + * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If + * the bucket's chain exceeds this length, bucket expansion is triggered). + * However, setting expand_mult to a non-zero value delays bucket expansion + * (that would be triggered by additions to this particular bucket) + * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. + * (The multiplier is simply expand_mult+1). The whole idea of this + * multiplier is to reduce bucket expansions, since they are expensive, in + * situations where we know that a particular bucket tends to be overused. + * It is better to let its chain length grow to a longer yet-still-bounded + * value, than to do an O(n) bucket expansion too often. + */ + unsigned expand_mult; + +} UT_hash_bucket; + +/* random signature used only to find hash tables in external analysis */ +#define HASH_SIGNATURE 0xa0111fe1 +#define HASH_BLOOM_SIGNATURE 0xb12220f2 + +typedef struct UT_hash_table { + UT_hash_bucket *buckets; + unsigned num_buckets, log2_num_buckets; + unsigned num_items; + struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ + ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ + + /* in an ideal situation (all buckets used equally), no bucket would have + * more than ceil(#items/#buckets) items. that's the ideal chain length. */ + unsigned ideal_chain_maxlen; + + /* nonideal_items is the number of items in the hash whose chain position + * exceeds the ideal chain maxlen. these items pay the penalty for an uneven + * hash distribution; reaching them in a chain traversal takes >ideal steps */ + unsigned nonideal_items; + + /* ineffective expands occur when a bucket doubling was performed, but + * afterward, more than half the items in the hash had nonideal chain + * positions. If this happens on two consecutive expansions we inhibit any + * further expansion, as it's not helping; this happens when the hash + * function isn't a good fit for the key domain. When expansion is inhibited + * the hash will still work, albeit no longer in constant time. */ + unsigned ineff_expands, noexpand; + + uint32_t signature; /* used only to find hash tables in external analysis */ +#ifdef HASH_BLOOM + uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ + uint8_t *bloom_bv; + char bloom_nbits; +#endif + +} UT_hash_table; + +typedef struct UT_hash_handle { + struct UT_hash_table *tbl; + void *prev; /* prev element in app order */ + void *next; /* next element in app order */ + struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ + struct UT_hash_handle *hh_next; /* next hh in bucket order */ + void *key; /* ptr to enclosing struct's key */ + unsigned keylen; /* enclosing struct's key len */ + unsigned hashv; /* result of hash-fcn(key) */ +} UT_hash_handle; + +#endif /* UTHASH_H */ diff --git a/src/mat/impls/aij/seq/makefile b/src/mat/impls/aij/seq/makefile index fa4a102..8806c9c 100644 --- a/src/mat/impls/aij/seq/makefile +++ b/src/mat/impls/aij/seq/makefile @@ -11,7 +11,7 @@ OBJSC = aij.o aijfact.o ij.o fdaij.o \ matmatmult.o symtranspose.o matptap.o matpapt.o inode.o inode2.o OBJSF = LIBBASE = libpetscmat -DIRS = spooles superlu umfpack essl lusol matlab csrperm crl bas ftn-kernels +DIRS = spooles superlu umfpack essl lusol matlab csrperm crl bas faij ftn-kernels MANSEC = Mat LOCDIR = src/mat/impls/aij/seq/ diff --git a/src/mat/interface/matregis.c b/src/mat/interface/matregis.c index 420162b..0cf6ff9 100644 --- a/src/mat/interface/matregis.c +++ b/src/mat/interface/matregis.c @@ -31,6 +31,10 @@ EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_CSRPERM(Mat); EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqCSRPERM(Mat); EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPICSRPERM(Mat); +EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_FAIJ(Mat); +EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqFAIJ(Mat); +EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIFAIJ(Mat); + EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_CRL(Mat); EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqCRL(Mat); EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPICRL(Mat); @@ -40,15 +44,15 @@ EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BlockMat(Mat); EXTERN_C_END - + /* - This is used by MatSetType() to make sure that at least one + This is used by MatSetType() to make sure that at least one MatRegisterAll() is called. In general, if there is more than one DLL, then MatRegisterAll() may be called several times. */ EXTERN PetscTruth MatRegisterAllCalled; -#undef __FUNCT__ +#undef __FUNCT__ #define __FUNCT__ "MatRegisterAll" /*@C MatRegisterAll - Registers all of the matrix types in PETSc @@ -85,6 +89,10 @@ PetscErrorCode PETSCMAT_DLLEXPORT MatRegisterAll(const char path[]) ierr = MatRegisterDynamic(MATMPICSRPERM, path,"MatCreate_MPICSRPERM", MatCreate_MPICSRPERM);CHKERRQ(ierr); ierr = MatRegisterDynamic(MATSEQCSRPERM, path,"MatCreate_SeqCSRPERM", MatCreate_SeqCSRPERM);CHKERRQ(ierr); + ierr = MatRegisterDynamic(MATFAIJ, path,"MatCreate_FAIJ", MatCreate_FAIJ);CHKERRQ(ierr); + ierr = MatRegisterDynamic(MATMPIFAIJ, path,"MatCreate_MPIFAIJ", MatCreate_MPIFAIJ);CHKERRQ(ierr); + ierr = MatRegisterDynamic(MATSEQFAIJ, path,"MatCreate_SeqFAIJ", MatCreate_SeqFAIJ);CHKERRQ(ierr); + ierr = MatRegisterDynamic(MATCRL, path,"MatCreate_CRL", MatCreate_CRL);CHKERRQ(ierr); ierr = MatRegisterDynamic(MATSEQCRL, path,"MatCreate_SeqCRL", MatCreate_SeqCRL);CHKERRQ(ierr); ierr = MatRegisterDynamic(MATMPICRL, path,"MatCreate_MPICRL", MatCreate_MPICRL);CHKERRQ(ierr);