# HG changeset patch
# User gdiso@hydrogen.localdomain
# Date 1306237762 -28800
# Node ID 0e71694ab3ed4407ea79d06301e628b99c55b47c
# Parent  0d15701e97c548d2383326121c6e93db7179a502
Aij matrix now use hash table for extra entries when preallcation is not exactly fitted.

diff -r 0d15701e97c5 -r 0e71694ab3ed src/mat/impls/aij/mpi/mpiaij.c
--- a/src/mat/impls/aij/mpi/mpiaij.c	Mon May 23 16:55:28 2011 -0500
+++ b/src/mat/impls/aij/mpi/mpiaij.c	Tue May 24 19:49:22 2011 +0800
@@ -460,94 +460,80 @@
 
   /* Some Variables required in the macro */
   Mat            A = aij->A;
-  Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data; 
-  PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
-  MatScalar      *aa = a->a;
+  Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
   PetscBool      ignorezeroentries = a->ignorezeroentries;
   Mat            B = aij->B;
-  Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data; 
-  PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
-  MatScalar      *ba = b->a;
-
-  PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 
-  PetscInt       nonew = a->nonew; 
-  MatScalar      *ap1,*ap2;
-
-  PetscFunctionBegin;
-  if (v) PetscValidScalarPointer(v,6);
-  for (i=0; i<m; i++) {
-    if (im[i] < 0) continue;
+
+  PetscFunctionBegin;
+  if (v)
+    PetscValidScalarPointer(v,6);
+  for (i=0; i<m; i++) {
+    if (im[i] < 0)
+      continue;
 #if defined(PETSC_USE_DEBUG)
-    if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
-#endif
-    if (im[i] >= rstart && im[i] < rend) {
-      row      = im[i] - rstart;
-      lastcol1 = -1;
-      rp1      = aj + ai[row]; 
-      ap1      = aa + ai[row];
-      rmax1    = aimax[row]; 
-      nrow1    = ailen[row];  
-      low1     = 0; 
-      high1    = nrow1;
-      lastcol2 = -1;
-      rp2      = bj + bi[row]; 
-      ap2      = ba + bi[row]; 
-      rmax2    = bimax[row]; 
-      nrow2    = bilen[row];  
-      low2     = 0; 
-      high2    = nrow2;
-
-      for (j=0; j<n; j++) {
-        if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
-        if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
-        if (in[j] >= cstart && in[j] < cend){
-          col = in[j] - cstart;
-          MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
-        } else if (in[j] < 0) continue;
+        if (im[i] >= mat->rmap->N)
+            SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
+#endif
+        if (im[i] >= rstart && im[i] < rend) {
+  row      = im[i] - rstart;
+
+  for (j=0; j<n; j++) {
+    if (v) {
+      if (roworiented)
+        value = v[i*n+j];
+      else
+        value = v[i+j*m];
+    } else
+      value = 0.0;
+      if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES))
+        continue;
+      if (in[j] >= cstart && in[j] < cend) {
+        col = in[j] - cstart;
+        MatSetValues_SeqAIJ(A,1,&row,1,&col,&value,addv);
+      } else if (in[j] < 0)
+        continue;
 #if defined(PETSC_USE_DEBUG)
-        else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
-#endif
-        else {
-          if (mat->was_assembled) {
-            if (!aij->colmap) {
-              ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
+      else if (in[j] >= mat->cmap->N) {
+  SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
+      }
+#endif
+      else {
+        if (mat->was_assembled) {
+          if (!aij->colmap) {
+            ierr = CreateColmap_MPIAIJ_Private(mat);
+            CHKERRQ(ierr);
+          }
+#if defined (PETSC_USE_CTABLE)
+          ierr = PetscTableFind(aij->colmap,in[j]+1,&col);
+          CHKERRQ(ierr);
+          col--;
+#else
+          col = aij->colmap[in[j]] - 1;
+#endif
+
+        if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
+           ierr = DisAssemble_MPIAIJ(mat);
+           CHKERRQ(ierr);
+           col =  in[j];
+           /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
+           B = aij->B;
+        }
+  } else
+    col = in[j];
+    MatSetValues_SeqAIJ(B,1,&row,1,&col,&value,addv);
+  }
+  }
+        } else {
+          if (!aij->donotstash) {
+            if (roworiented) {
+              ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
+              CHKERRQ(ierr);
+            } else {
+              ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
+              CHKERRQ(ierr);
             }
-#if defined (PETSC_USE_CTABLE)
-            ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
-	    col--;
-#else
-            col = aij->colmap[in[j]] - 1;
-#endif
-            if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
-              ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
-              col =  in[j];
-              /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
-              B = aij->B;
-              b = (Mat_SeqAIJ*)B->data; 
-              bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
-              rp2      = bj + bi[row]; 
-              ap2      = ba + bi[row]; 
-              rmax2    = bimax[row]; 
-              nrow2    = bilen[row];  
-              low2     = 0; 
-              high2    = nrow2;
-              bm       = aij->B->rmap->n;
-              ba = b->a;
-            }
-          } else col = in[j];
-          MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
-        }
-      }
-    } else {
-      if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
-      if (!aij->donotstash) {
-        if (roworiented) {
-          ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
-        } else {
-          ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
-        }
-      }
-    }
+          }
+        }
   }
   PetscFunctionReturn(0);
 }
@@ -623,6 +609,8 @@
   PetscFunctionReturn(0);
 }
 
+extern PetscErrorCode MatFlushBuffer_SeqAIJ(Mat mat);
+
 #undef __FUNCT__  
 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
@@ -672,6 +660,8 @@
     }
   }
   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
+    /* must flush matrix entries in the hash table to aij array */
+    MatFlushBuffer_SeqAIJ(aij->B);
     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
   }
   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
@@ -5670,99 +5660,78 @@
   PetscScalar     value;
   PetscErrorCode  ierr;
 
+  PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
+  PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
+  PetscBool       roworiented = aij->roworiented;
+
+  /* Some Variables required in the macro */
+  Mat             A = aij->A;
+  Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data; 
+  PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 
+  Mat             B = aij->B;
+
+  PetscFunctionBegin;
+
   ierr = MatPreallocated(mat);CHKERRQ(ierr);
   if (mat->insertmode == NOT_SET_VALUES) {
     mat->insertmode = addv;
   }
 #if defined(PETSC_USE_DEBUG)
   else if (mat->insertmode != addv) {
-    SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
-  }
-#endif
-  { 
-  PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
-  PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
-  PetscBool       roworiented = aij->roworiented;
-
-  /* Some Variables required in the macro */
-  Mat             A = aij->A;
-  Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data; 
-  PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
-  MatScalar       *aa = a->a;
-  PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 
-  Mat             B = aij->B;
-  Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data; 
-  PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
-  MatScalar       *ba = b->a;
-
-  PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 
-  PetscInt        nonew = a->nonew; 
-  MatScalar       *ap1,*ap2;
-
-  PetscFunctionBegin;
+  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
+  }
+#endif
+
   for (i=0; i<m; i++) {
     if (im[i] < 0) continue;
 #if defined(PETSC_USE_DEBUG)
     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
 #endif
     if (im[i] >= rstart && im[i] < rend) {
-      row      = im[i] - rstart;
-      lastcol1 = -1;
-      rp1      = aj + ai[row]; 
-      ap1      = aa + ai[row];
-      rmax1    = aimax[row]; 
-      nrow1    = ailen[row];  
-      low1     = 0; 
-      high1    = nrow1;
-      lastcol2 = -1;
-      rp2      = bj + bi[row]; 
-      ap2      = ba + bi[row]; 
-      rmax2    = bimax[row]; 
-      nrow2    = bilen[row];  
-      low2     = 0; 
-      high2    = nrow2;
-
-      for (j=0; j<n; j++) {
-        if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
-        if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
-        if (in[j] >= cstart && in[j] < cend){
-          col = in[j] - cstart;
-          MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
-        } else if (in[j] < 0) continue;
+  row      = im[i] - rstart;
+  for (j=0; j<n; j++) {
+    if (roworiented)
+      value = v[i*n+j];
+    else
+      value = v[i+j*m];
+    if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES))
+      continue;
+    if (in[j] >= cstart && in[j] < cend) {
+      col = in[j] - cstart;
+      MatSetValues_SeqAIJ(A,1,&row,1,&col,&value,addv);
+    } else if (in[j] < 0)
+      continue;
 #if defined(PETSC_USE_DEBUG)
-        else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
-#endif
-        else {
-          if (mat->was_assembled) {
-            if (!aij->colmap) {
-              ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
-            }
-#if defined (PETSC_USE_CTABLE)
-            ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
-	    col--;
-#else
-            col = aij->colmap[in[j]] - 1;
-#endif
-            if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
-              ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
-              col =  in[j];
-              /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
-              B = aij->B;
-              b = (Mat_SeqAIJ*)B->data; 
-              bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
-              rp2      = bj + bi[row]; 
-              ap2      = ba + bi[row]; 
-              rmax2    = bimax[row]; 
-              nrow2    = bilen[row];  
-              low2     = 0; 
-              high2    = nrow2;
-              bm       = aij->B->rmap->n;
-              ba = b->a;
-            }
-          } else col = in[j];
-          MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
-        }
-      }
+    else if (in[j] >= mat->cmap->N) {
+       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
+    }
+#endif
+    else {
+  if (mat->was_assembled) {
+    if (!aij->colmap) {
+      ierr = CreateColmap_MPIAIJ_Private(mat);
+      CHKERRQ(ierr);
+    }
+#if defined (PETSC_USE_CTABLE)
+    ierr = PetscTableFind(aij->colmap,in[j]+1,&col);
+    CHKERRQ(ierr);
+    col--;
+#else
+    col = aij->colmap[in[j]] - 1;
+#endif
+
+  if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
+  ierr = DisAssemble_MPIAIJ(mat);
+  CHKERRQ(ierr);
+  col =  in[j];
+  /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
+  B = aij->B;
+                            }
+  } else
+    col = in[j];
+    MatSetValues_SeqAIJ(B,1,&row,1,&col,&value,addv);
+                    }
+  }
     } else {
       if (!aij->donotstash) {
         if (roworiented) {
@@ -5772,7 +5741,7 @@
         }
       }
     }
-  }}
+  }
   PetscFunctionReturnVoid();
 }
 EXTERN_C_END
diff -r 0d15701e97c5 -r 0e71694ab3ed src/mat/impls/aij/seq/aij.c
--- a/src/mat/impls/aij/seq/aij.c	Mon May 23 16:55:28 2011 -0500
+++ b/src/mat/impls/aij/seq/aij.c	Tue May 24 19:49:22 2011 +0800
@@ -272,77 +272,277 @@
   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N;
   PetscInt       *imax = a->imax,*ai = a->i,*ailen = a->ilen;
-  PetscErrorCode ierr;
   PetscInt       *aj = a->j,nonew = a->nonew,lastcol = -1;
   MatScalar      *ap,value,*aa = a->a;
   PetscBool      ignorezeroentries = a->ignorezeroentries;
   PetscBool      roworiented = a->roworiented;
 
-  PetscFunctionBegin;  
-  if (v) PetscValidScalarPointer(v,6);
+  Mat_SeqAIJ_Entry_Key       entry_key;
+  Mat_SeqAIJ_Entry          *mat_entry;
+
+  PetscFunctionBegin;  
+
+  if (v)
+    PetscValidScalarPointer(v,6);
   for (k=0; k<m; k++) { /* loop over added rows */
-    row  = im[k]; 
-    if (row < 0) continue;
-#if defined(PETSC_USE_DEBUG)  
-    if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
-#endif
-    rp   = aj + ai[row]; ap = aa + ai[row];
-    rmax = imax[row]; nrow = ailen[row]; 
+    row  = im[k];
+    if (row < 0)
+      continue;
+#if defined(PETSC_USE_DEBUG)
+
+        if (row >= A->rmap->n)
+            SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
+#endif
+
+        rp   = aj + ai[row];
+        ap = aa + ai[row];
+        rmax = imax[row];
+        nrow = ailen[row];
+        low  = 0;
+        high = nrow;
+        for (l=0; l<n; l++) { /* loop over added columns */
+          if (in[l] < 0)
+            continue;
+#if defined(PETSC_USE_DEBUG)
+
+            if (in[l] >= A->cmap->n)
+                SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
+#endif
+
+            col = in[l];
+            if (v) {
+              if (roworiented) {
+                value = v[l + k*n];
+              } else {
+                value = v[k + l*m];
+              }
+            } else {
+              value = 0.;
+            }
+            if (value == 0.0 && ignorezeroentries && (is == ADD_VALUES))
+              continue;
+
+            if (col <= lastcol)
+              low = 0;
+            else
+              high = nrow;
+            lastcol = col;
+            while (high-low > 5) {
+              t = (low+high)/2;
+              if (rp[t] > col)
+                high = t;
+              else
+                low  = t;
+            }
+
+
+            for (i=low; i<high; i++) {
+              if (rp[i] > col)
+                break;
+              if (rp[i] == col) {
+                if (is == ADD_VALUES)
+                  ap[i] += value;
+                else
+                  ap[i] = value;
+                low = i + 1;
+                goto noinsert;
+              }
+            }
+            if (value == 0.0 && ignorezeroentries)
+              goto noinsert;
+            if (nonew == 1)
+              goto noinsert;
+            if (nonew == -1)
+              SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at (%D,%D) in the matrix",row,col);
+
+            if(nrow >= rmax) {
+              /* find if the entry in hash table */
+              entry_key.row = row;
+              entry_key.col = col;
+              HASH_FIND (hh, a->entry_ht, &entry_key, sizeof(Mat_SeqAIJ_Entry_Key), mat_entry);
+              if(mat_entry) {
+                if (is == ADD_VALUES)
+                  mat_entry->value += value;
+                else
+                  mat_entry->value = value;
+              } else {
+                /* add a new entry to hash table */
+                mat_entry = calloc(1, sizeof(Mat_SeqAIJ_Entry));
+                mat_entry->key.row = row;
+                mat_entry->key.col = col;
+                mat_entry->value   = value;
+                HASH_ADD (hh, a->entry_ht, key, sizeof(Mat_SeqAIJ_Entry_Key), mat_entry);
+              }
+              goto noinsert;
+            }
+
+            N = nrow++ - 1;
+            a->nz++;
+            high++;
+            /* shift up all the later entries in this row */
+            for (ii=N; ii>=i; ii--) {
+              rp[ii+1] = rp[ii];
+              ap[ii+1] = ap[ii];
+            }
+            rp[i] = col;
+            ap[i] = value;
+            low   = i + 1;
+
+noinsert:
+    ;
+        }
+        ailen[row] = nrow;
+  }
+  A->same_nonzero = PETSC_FALSE;
+  PetscFunctionReturn(0);
+} 
+
+
+#undef __FUNCT__
+#define __FUNCT__ "MatInsertValue_SeqAIJ_Private"
+PetscErrorCode MatInsertValue_SeqAIJ_Private(Mat A,PetscInt row,PetscInt col,const PetscScalar value) 
+{
+  Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
+  PetscInt       *rp,low,high,t,nrow,i,ii;
+  PetscInt       *ai = a->i,*ailen = a->ilen;
+  PetscInt       *aj = a->j;
+  MatScalar      *ap,*aa = a->a;
+
+  PetscFunctionBegin;
+
+  if (row < 0)
+    PetscFunctionReturn(0);
+#if defined(PETSC_USE_DEBUG)
+
+    if (row >= A->rmap->n)
+        SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
+#endif
+
+    rp = aj + ai[row];
+    ap = aa + ai[row];
+    nrow = ailen[row];
+
+
+    if (col < 0)
+      PetscFunctionReturn(0);
+#if defined(PETSC_USE_DEBUG)
+
+    if (col >= A->cmap->n)
+        SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,A->cmap->n-1);
+#endif
+
     low  = 0;
     high = nrow;
-    for (l=0; l<n; l++) { /* loop over added columns */
-      if (in[l] < 0) continue;
-#if defined(PETSC_USE_DEBUG)  
-      if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
-#endif
-      col = in[l];
-      if (v) {
-	if (roworiented) {
-	  value = v[l + k*n]; 
-	} else {
-	  value = v[k + l*m];
-	}
-      } else {
-        value = 0.;
-      }
-      if (value == 0.0 && ignorezeroentries && (is == ADD_VALUES)) continue;
-
-      if (col <= lastcol) low = 0; else high = nrow;
-      lastcol = col;
-      while (high-low > 5) {
-        t = (low+high)/2;
-        if (rp[t] > col) high = t;
-        else             low  = t;
-      }
-      for (i=low; i<high; i++) {
-        if (rp[i] > col) break;
-        if (rp[i] == col) {
-          if (is == ADD_VALUES) ap[i] += value;  
-          else                  ap[i] = value;
-          low = i + 1;
-          goto noinsert;
-        }
-      } 
-      if (value == 0.0 && ignorezeroentries) goto noinsert;
-      if (nonew == 1) goto noinsert;
-      if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at (%D,%D) in the matrix",row,col);
-      MatSeqXAIJReallocateAIJ(A,A->rmap->n,1,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
-      N = nrow++ - 1; a->nz++; high++;
-      /* shift up all the later entries in this row */
-      for (ii=N; ii>=i; ii--) {
-        rp[ii+1] = rp[ii];
-        ap[ii+1] = ap[ii];
-      }
-      rp[i] = col; 
-      ap[i] = value; 
-      low   = i + 1;
-      noinsert:;
-    }
-    ailen[row] = nrow;
-  }
-  A->same_nonzero = PETSC_FALSE;
-  PetscFunctionReturn(0);
-} 
+    while (high-low > 5) {
+      t = (low+high)/2;
+      if (rp[t] > col)
+        high = t;
+      else
+        low  = t;
+    }
+
+    for (i=low; i<high; i++) {
+      if (rp[i] > col)
+        break;
+      if (rp[i] == col) {
+        ap[i] = value;
+        PetscFunctionReturn(0);
+      }
+    }
+
+    if(nrow >= a->imax[row])
+      SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at (%D,%D) in the matrix",row,col);
+
+    /* shift up all the later entries in this row */
+    for (ii=nrow-1; ii>=i; ii--) {
+      rp[ii+1] = rp[ii];
+      ap[ii+1] = ap[ii];
+    }
+    rp[i] = col;
+    ap[i] = value;
+
+    ailen[row]++;
+    a->nz++;
+
+    PetscFunctionReturn(0);
+}
+
+
+
+#undef __FUNCT__
+#define __FUNCT__ "MatFlushBuffer_SeqAIJ"
+PetscErrorCode MatFlushBuffer_SeqAIJ(Mat A) 
+{
+  Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
+  PetscErrorCode ierr;
+  PetscInt       i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
+  PetscInt       m = A->rmap->n, N, *ailen = a->ilen;
+  MatScalar      *aa = a->a;
+
+  PetscInt         *new_ilen, *new_i, *new_j;
+  MatScalar        *new_a;
+  Mat_SeqAIJ_Entry *mat_entry, *tmp_entry;
+  PetscInt         entries = HASH_COUNT(a->entry_ht);
+
+  PetscFunctionBegin;
+
+  /* process values in the hash table */
+  if(entries > 0) {
+    N=0;
+    PetscMalloc(m*sizeof(PetscInt), &new_ilen);
+
+    /* statistic the mat entries in aij matrix*/
+    for (i=0; i<m; i++) {
+      new_ilen[i] = ailen[i];
+      N += ailen[i];
+    }
+
+    /* statistic the mat entries in hash table*/
+    N += entries;
+    for(mat_entry=a->entry_ht; mat_entry != NULL; mat_entry=mat_entry->hh.next) {
+      new_ilen[mat_entry->key.row]++;
+    }
+    /* realloc memory fit nz exactly*/
+    a->maxnz = N;
+    ierr = PetscMalloc3(N,MatScalar,&new_a,N,PetscInt,&new_j,A->rmap->n+1,PetscInt,&new_i);
+    CHKERRQ(ierr);
+
+    /* set row index */
+    new_i[0]=0;
+    for (i=1; i<=m; i++) {
+      new_i[i] = new_i[i-1] + new_ilen[i-1];
+    }
+    /* copy values to new location */
+    for (i=0; i<m; i++) {
+      for(j=0; j<ailen[i]; j++) {
+        new_a[new_i[i]+j] = aa[ai[i]+j];
+        new_j[new_i[i]+j] = aj[ai[i]+j];
+      }
+      imax[i] = new_ilen[i];
+    }
+
+    /* free old array */
+    ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
+    CHKERRQ(ierr);
+
+    a->a = new_a;
+    a->i = new_i;
+    a->j = new_j;
+
+    /* process values in hash table and then free it */
+    HASH_ITER(hh, a->entry_ht, mat_entry, tmp_entry) {
+      MatInsertValue_SeqAIJ_Private(A, mat_entry->key.row, mat_entry->key.col, mat_entry->value);
+      HASH_DEL(a->entry_ht, mat_entry); /* delete; users advances to next */
+      free(mat_entry); /* must free the entry */
+    }
+    a->entry_ht = 0;
+
+    PetscFree(new_ilen);
+  }
+
+  PetscFunctionReturn(0);
+}
+
 
 
 #undef __FUNCT__  
@@ -792,28 +992,42 @@
 {
   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
   PetscErrorCode ierr;
-  PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
+  PetscInt       fshift = 0,i,j,*ai,*aj,*imax = a->imax;
   PetscInt       m = A->rmap->n,*ip,N,*ailen = a->ilen,rmax = 0;
-  MatScalar      *aa = a->a,*ap;
+  MatScalar      *aa,*ap;
   PetscReal      ratio=0.6;
 
-  PetscFunctionBegin;  
-  if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
-
-  if (m) rmax = ailen[0]; /* determine row with most nonzeros */
+  PetscInt       *new_i, *new_j;
+  MatScalar      *new_a;
+  PetscInt       entries = HASH_COUNT(a->entry_ht);
+
+  PetscFunctionBegin;
+  if (mode == MAT_FLUSH_ASSEMBLY)
+    PetscFunctionReturn(0);
+
+  /* process values in the hash table */
+  MatFlushBuffer_SeqAIJ(A);
+
+  /* set mat array */
+  aa = a->a;
+  ai = a->i;
+  aj = a->j;
+
+  if (m)
+    rmax = ailen[0]; /* determine row with most nonzeros */
   for (i=1; i<m; i++) {
     /* move each row back by the amount of empty slots (fshift) before it*/
     fshift += imax[i-1] - ailen[i-1];
     rmax   = PetscMax(rmax,ailen[i]);
     if (fshift) {
-      ip = aj + ai[i] ; 
+      ip = aj + ai[i] ;
       ap = aa + ai[i] ;
       N  = ailen[i];
       for (j=0; j<N; j++) {
         ip[j-fshift] = ip[j];
-        ap[j-fshift] = ap[j]; 
-      }
-    } 
+        ap[j-fshift] = ap[j];
+      }
+    }
     ai[i] = ai[i-1] + ailen[i-1];
   }
   if (m) {
@@ -824,22 +1038,48 @@
   for (i=0; i<m; i++) {
     ailen[i] = imax[i] = ai[i+1] - ai[i];
   }
-  a->nz = ai[m]; 
-  if (fshift && a->nounused == -1) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D, %D unneeded", m, A->cmap->n, fshift);
-
-  ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
-  ierr = PetscInfo4(A,"Matrix size: %D X %D; storage space: %D unneeded,%D used\n",m,A->cmap->n,fshift,a->nz);CHKERRQ(ierr);
-  ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr);
-  ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",rmax);CHKERRQ(ierr);
-  A->info.mallocs     += a->reallocs;
+  a->nz = ai[m];
+  if (fshift && a->nounused == -1) {
+    SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D, %D unneeded", m, A->cmap->n, fshift);
+  }
+
+  if(a->maxnz > a->nz) {
+    ierr = PetscMalloc3(a->nz,MatScalar,&new_a,a->nz,PetscInt,&new_j,A->rmap->n+1,PetscInt,&new_i);
+    CHKERRQ(ierr);
+    ierr = PetscMemcpy(new_a,a->a,a->nz*sizeof(MatScalar));
+    CHKERRQ(ierr);
+    ierr = PetscMemcpy(new_i,a->i,(A->rmap->n+1)*sizeof(PetscInt));
+    CHKERRQ(ierr);
+    ierr = PetscMemcpy(new_j,a->j,a->nz*sizeof(PetscInt));
+    CHKERRQ(ierr);
+    ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
+    CHKERRQ(ierr);
+    aa = a->a = new_a;
+    ai = a->i = new_i;
+    aj = a->j = new_j;
+    a->maxnz = a->nz;
+  }
+
+  ierr = MatMarkDiagonal_SeqAIJ(A);
+  CHKERRQ(ierr);
+  ierr = PetscInfo4(A,"Matrix size: %D X %D; storage space: %D unneeded,%D used\n",m,A->cmap->n,fshift,a->nz);
+  CHKERRQ(ierr);
+  ierr = PetscInfo1(A,"Nonzeros in hash table is %D\n",entries);
+  CHKERRQ(ierr);
+  ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",rmax);
+  CHKERRQ(ierr);
+
+
   a->reallocs          = 0;
   A->info.nz_unneeded  = (double)fshift;
   a->rmax              = rmax;
 
+  /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,m,ratio);CHKERRQ(ierr); 
   A->same_nonzero = PETSC_TRUE;
 
-  ierr = MatAssemblyEnd_SeqAIJ_Inode(A,mode);CHKERRQ(ierr);
+  ierr = MatAssemblyEnd_SeqAIJ_Inode(A,mode);
+  CHKERRQ(ierr);
 
   a->idiagvalid = PETSC_FALSE;
   PetscFunctionReturn(0);
@@ -888,12 +1128,20 @@
 PetscErrorCode MatDestroy_SeqAIJ(Mat A)
 {
   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
+  Mat_SeqAIJ_Entry *current_entry, *tmp_entry;
   PetscErrorCode ierr;
 
   PetscFunctionBegin;
 #if defined(PETSC_USE_LOG)
   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz);
 #endif
+
+
+  HASH_ITER(hh, a->entry_ht, current_entry, tmp_entry)  {
+    HASH_DEL(a->entry_ht, current_entry); /* delete; users advances to next */
+    free(current_entry); /* optional- if you want to free */
+  }
+
   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
@@ -3494,6 +3742,7 @@
   b->roworiented       = PETSC_TRUE;
   b->nonew             = 0;
   b->diag              = 0;
+  b->entry_ht          = 0;
   b->solve_work        = 0;
   B->spptr             = 0;
   b->saved_values      = 0;
@@ -3988,8 +4237,12 @@
   PetscBool      ignorezeroentries = a->ignorezeroentries;
   PetscBool      roworiented = a->roworiented;
 
-  PetscFunctionBegin;  
-  ierr = MatPreallocated(A);CHKERRQ(ierr);
+  Mat_SeqAIJ_Entry_Key      entry_key;
+  Mat_SeqAIJ_Entry          *mat_entry;
+
+  PetscFunctionBegin;
+  ierr = MatPreallocated(A);
+  CHKERRQ(ierr);
   imax = a->imax;
   ai = a->i;
   ailen = a->ilen;
@@ -3997,59 +4250,105 @@
   aa = a->a;
 
   for (k=0; k<m; k++) { /* loop over added rows */
-    row  = im[k]; 
-    if (row < 0) continue;
-#if defined(PETSC_USE_DEBUG)  
-    if (row >= A->rmap->n) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
-#endif
-    rp   = aj + ai[row]; ap = aa + ai[row];
-    rmax = imax[row]; nrow = ailen[row]; 
-    low  = 0;
-    high = nrow;
-    for (l=0; l<n; l++) { /* loop over added columns */
-      if (in[l] < 0) continue;
-#if defined(PETSC_USE_DEBUG)  
-      if (in[l] >= A->cmap->n) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
-#endif
-      col = in[l];
-      if (roworiented) {
-        value = v[l + k*n]; 
-      } else {
-        value = v[k + l*m];
-      }
-      if (value == 0.0 && ignorezeroentries && (is == ADD_VALUES)) continue;
-
-      if (col <= lastcol) low = 0; else high = nrow;
-      lastcol = col;
-      while (high-low > 5) {
-        t = (low+high)/2;
-        if (rp[t] > col) high = t;
-        else             low  = t;
-      }
-      for (i=low; i<high; i++) {
-        if (rp[i] > col) break;
-        if (rp[i] == col) {
-          if (is == ADD_VALUES) ap[i] += value;  
-          else                  ap[i] = value;
-          goto noinsert;
-        }
-      } 
-      if (value == 0.0 && ignorezeroentries) goto noinsert;
-      if (nonew == 1) goto noinsert;
-      if (nonew == -1) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
-      MatSeqXAIJReallocateAIJ(A,A->rmap->n,1,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
-      N = nrow++ - 1; a->nz++; high++;
-      /* shift up all the later entries in this row */
-      for (ii=N; ii>=i; ii--) {
-        rp[ii+1] = rp[ii];
-        ap[ii+1] = ap[ii];
-      }
-      rp[i] = col; 
-      ap[i] = value; 
-      noinsert:;
-      low = i + 1;
-    }
-    ailen[row] = nrow;
+    row  = im[k];
+    if (row < 0)
+      continue;
+#if defined(PETSC_USE_DEBUG)
+
+        if (row >= A->rmap->n)
+            SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
+#endif
+
+        rp   = aj + ai[row];
+        ap = aa + ai[row];
+        rmax = imax[row];
+        nrow = ailen[row];
+        low  = 0;
+        high = nrow;
+        for (l=0; l<n; l++) { /* loop over added columns */
+          if (in[l] < 0)
+            continue;
+#if defined(PETSC_USE_DEBUG)
+
+            if (in[l] >= A->cmap->n)
+                SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
+#endif
+
+            col = in[l];
+            if (roworiented) {
+              value = v[l + k*n];
+            } else {
+              value = v[k + l*m];
+            }
+            if (value == 0.0 && ignorezeroentries && (is == ADD_VALUES))
+              continue;
+
+            if (col <= lastcol)
+              low = 0;
+            else
+              high = nrow;
+            lastcol = col;
+            while (high-low > 5) {
+              t = (low+high)/2;
+              if (rp[t] > col)
+                high = t;
+              else
+                low  = t;
+            }
+            for (i=low; i<high; i++) {
+              if (rp[i] > col)
+                break;
+              if (rp[i] == col) {
+                if (is == ADD_VALUES)
+                  ap[i] += value;
+                else
+                  ap[i] = value;
+                goto noinsert;
+              }
+            }
+            if (value == 0.0 && ignorezeroentries)
+              goto noinsert;
+            if (nonew == 1)
+              goto noinsert;
+            if (nonew == -1)
+              SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
+
+            if(nrow >= rmax) {
+              /* find if the entry in hash table */
+              entry_key.row = row;
+              entry_key.col = col;
+              HASH_FIND (hh, a->entry_ht, &entry_key, sizeof(Mat_SeqAIJ_Entry_Key), mat_entry);
+              if(mat_entry) {
+                if (is == ADD_VALUES)
+                  mat_entry->value += value;
+                else
+                  mat_entry->value = value;
+              } else {
+                /* add a new entry to hash table */
+                mat_entry = calloc(1, sizeof(Mat_SeqAIJ_Entry_Key));
+                mat_entry->key.row = row;
+                mat_entry->key.col = col;
+                mat_entry->value   = value;
+                HASH_ADD (hh, a->entry_ht, key, sizeof(Mat_SeqAIJ_Entry_Key), mat_entry);
+              }
+              goto noinsert;
+            }
+
+            N = nrow++ - 1;
+            a->nz++;
+            high++;
+            /* shift up all the later entries in this row */
+            for (ii=N; ii>=i; ii--) {
+              rp[ii+1] = rp[ii];
+              ap[ii+1] = ap[ii];
+            }
+            rp[i] = col;
+            ap[i] = value;
+noinsert:
+    ;
+            low = i + 1;
+        }
+        ailen[row] = nrow;
   }
   A->same_nonzero = PETSC_FALSE;
   PetscFunctionReturnVoid();
diff -r 0d15701e97c5 -r 0e71694ab3ed src/mat/impls/aij/seq/aij.h
--- a/src/mat/impls/aij/seq/aij.h	Mon May 23 16:55:28 2011 -0500
+++ b/src/mat/impls/aij/seq/aij.h	Tue May 24 19:49:22 2011 +0800
@@ -3,6 +3,21 @@
 #define __AIJ_H
 
 #include <private/matimpl.h>
+#include "uthash.h"
+
+
+typedef struct {
+    PetscInt row;
+    PetscInt col;
+}Mat_SeqAIJ_Entry_Key;
+
+
+typedef struct {
+  Mat_SeqAIJ_Entry_Key key;
+  PetscScalar          value;
+  UT_hash_handle       hh;
+}Mat_SeqAIJ_Entry;
+
 
 /*  
     Struct header shared by SeqAIJ, SeqBAIJ and SeqSBAIJ matrix formats
@@ -30,6 +45,7 @@
   PetscInt          *i;               /* pointer to beginning of each row */               \
   PetscInt          *j;               /* column values: j + i[k] - 1 is start of row k */  \
   PetscInt          *diag;            /* pointers to diagonal elements */                  \
+  Mat_SeqAIJ_Entry  *entry_ht;        /* hash table for matrix entry */                    \
   PetscBool         free_diag;         \
   datatype          *a;               /* nonzero elements */                               \
   PetscScalar       *solve_work;      /* work space used in MatSolve */                    \
diff -r 0d15701e97c5 -r 0e71694ab3ed src/mat/impls/aij/seq/uthash.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/mat/impls/aij/seq/uthash.h	Tue May 24 19:49:22 2011 +0800
@@ -0,0 +1,972 @@
+/*
+Copyright (c) 2003-2010, Troy D. Hanson     http://uthash.sourceforge.net
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef UTHASH_H
+#define UTHASH_H 
+
+#include <string.h>   /* memcmp,strlen */
+#include <stddef.h>   /* ptrdiff_t */
+
+/* These macros use decltype or the earlier __typeof GNU extension.
+   As decltype is only available in newer compilers (VS2010 or gcc 4.3+
+   when compiling c++ source) this code uses whatever method is needed
+   or, for VS2008 where neither is available, uses casting workarounds. */
+#ifdef _MSC_VER         /* MS compiler */
+#if _MSC_VER >= 1600 && defined(__cplusplus)  /* VS2010 or newer in C++ mode */
+#define DECLTYPE(x) (decltype(x))
+#else                   /* VS2008 or older (or VS2010 in C mode) */
+#define NO_DECLTYPE
+#define DECLTYPE(x)
+#endif
+#else                   /* GNU, Sun and other compilers */
+#define DECLTYPE(x) (__typeof(x))
+#endif
+
+#ifdef NO_DECLTYPE
+#define DECLTYPE_ASSIGN(dst,src)                                                 \
+do {                                                                             \
+  char **_da_dst = (char**)(&(dst));                                             \
+  *_da_dst = (char*)(src);                                                       \
+} while(0)
+#else 
+#define DECLTYPE_ASSIGN(dst,src)                                                 \
+do {                                                                             \
+  (dst) = DECLTYPE(dst)(src);                                                    \
+} while(0)
+#endif
+
+/* a number of the hash function use uint32_t which isn't defined on win32 */
+#ifdef _MSC_VER
+typedef unsigned int uint32_t;
+#else
+#include <inttypes.h>   /* uint32_t */
+#endif
+
+#define UTHASH_VERSION 1.9.3
+
+#define uthash_fatal(msg) exit(-1)        /* fatal error (out of memory,etc) */
+#define uthash_malloc(sz) malloc(sz)      /* malloc fcn                      */
+#define uthash_free(ptr,sz) free(ptr)     /* free fcn                        */
+
+#define uthash_noexpand_fyi(tbl)          /* can be defined to log noexpand  */
+#define uthash_expand_fyi(tbl)            /* can be defined to log expands   */
+
+/* initial number of buckets */
+#define HASH_INITIAL_NUM_BUCKETS 32      /* initial number of buckets        */
+#define HASH_INITIAL_NUM_BUCKETS_LOG2 5  /* lg2 of initial number of buckets */
+#define HASH_BKT_CAPACITY_THRESH 10      /* expand when bucket count reaches */
+
+/* calculate the element whose hash handle address is hhe */
+#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho)))
+
+#define HASH_FIND(hh,head,keyptr,keylen,out)                                     \
+do {                                                                             \
+  unsigned _hf_bkt,_hf_hashv;                                                    \
+  out=NULL;                                                                      \
+  if (head) {                                                                    \
+     HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt);   \
+     if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) {                           \
+       HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ],  \
+                        keyptr,keylen,out);                                      \
+     }                                                                           \
+  }                                                                              \
+} while (0)
+
+#ifdef HASH_BLOOM
+#define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM)
+#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0)
+#define HASH_BLOOM_MAKE(tbl)                                                     \
+do {                                                                             \
+  (tbl)->bloom_nbits = HASH_BLOOM;                                               \
+  (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN);                 \
+  if (!((tbl)->bloom_bv))  { uthash_fatal( "out of memory"); }                   \
+  memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN);                                \
+  (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE;                                       \
+} while (0);
+
+#define HASH_BLOOM_FREE(tbl)                                                     \
+do {                                                                             \
+  uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN);                              \
+} while (0);
+
+#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8)))
+#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8)))
+
+#define HASH_BLOOM_ADD(tbl,hashv)                                                \
+  HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
+
+#define HASH_BLOOM_TEST(tbl,hashv)                                               \
+  HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
+
+#else
+#define HASH_BLOOM_MAKE(tbl) 
+#define HASH_BLOOM_FREE(tbl) 
+#define HASH_BLOOM_ADD(tbl,hashv) 
+#define HASH_BLOOM_TEST(tbl,hashv) (1)
+#endif
+
+#define HASH_MAKE_TABLE(hh,head)                                                 \
+do {                                                                             \
+  (head)->hh.tbl = (UT_hash_table*)uthash_malloc(                                \
+                  sizeof(UT_hash_table));                                        \
+  if (!((head)->hh.tbl))  { uthash_fatal( "out of memory"); }                    \
+  memset((head)->hh.tbl, 0, sizeof(UT_hash_table));                              \
+  (head)->hh.tbl->tail = &((head)->hh);                                          \
+  (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS;                        \
+  (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2;              \
+  (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head);                    \
+  (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc(                      \
+          HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket));               \
+  if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); }             \
+  memset((head)->hh.tbl->buckets, 0,                                             \
+          HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket));               \
+  HASH_BLOOM_MAKE((head)->hh.tbl);                                               \
+  (head)->hh.tbl->signature = HASH_SIGNATURE;                                    \
+} while(0)
+
+#define HASH_ADD(hh,head,fieldname,keylen_in,add)                                \
+        HASH_ADD_KEYPTR(hh,head,&add->fieldname,keylen_in,add)
+ 
+#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add)                            \
+do {                                                                             \
+ unsigned _ha_bkt;                                                               \
+ (add)->hh.next = NULL;                                                          \
+ (add)->hh.key = (char*)keyptr;                                                  \
+ (add)->hh.keylen = keylen_in;                                                   \
+ if (!(head)) {                                                                  \
+    head = (add);                                                                \
+    (head)->hh.prev = NULL;                                                      \
+    HASH_MAKE_TABLE(hh,head);                                                    \
+ } else {                                                                        \
+    (head)->hh.tbl->tail->next = (add);                                          \
+    (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail);         \
+    (head)->hh.tbl->tail = &((add)->hh);                                         \
+ }                                                                               \
+ (head)->hh.tbl->num_items++;                                                    \
+ (add)->hh.tbl = (head)->hh.tbl;                                                 \
+ HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets,                         \
+         (add)->hh.hashv, _ha_bkt);                                              \
+ HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh);                   \
+ HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv);                                 \
+ HASH_EMIT_KEY(hh,head,keyptr,keylen_in);                                        \
+ HASH_FSCK(hh,head);                                                             \
+} while(0)
+
+#define HASH_TO_BKT( hashv, num_bkts, bkt )                                      \
+do {                                                                             \
+  bkt = ((hashv) & ((num_bkts) - 1));                                            \
+} while(0)
+
+/* delete "delptr" from the hash table.
+ * "the usual" patch-up process for the app-order doubly-linked-list.
+ * The use of _hd_hh_del below deserves special explanation.
+ * These used to be expressed using (delptr) but that led to a bug
+ * if someone used the same symbol for the head and deletee, like
+ *  HASH_DELETE(hh,users,users);
+ * We want that to work, but by changing the head (users) below
+ * we were forfeiting our ability to further refer to the deletee (users)
+ * in the patch-up process. Solution: use scratch space to
+ * copy the deletee pointer, then the latter references are via that
+ * scratch pointer rather than through the repointed (users) symbol.
+ */
+#define HASH_DELETE(hh,head,delptr)                                              \
+do {                                                                             \
+    unsigned _hd_bkt;                                                            \
+    struct UT_hash_handle *_hd_hh_del;                                           \
+    if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) )  {         \
+        uthash_free((head)->hh.tbl->buckets,                                     \
+                    (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
+        HASH_BLOOM_FREE((head)->hh.tbl);                                         \
+        uthash_free((head)->hh.tbl, sizeof(UT_hash_table));                      \
+        head = NULL;                                                             \
+    } else {                                                                     \
+        _hd_hh_del = &((delptr)->hh);                                            \
+        if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) {     \
+            (head)->hh.tbl->tail =                                               \
+                (UT_hash_handle*)((char*)((delptr)->hh.prev) +                   \
+                (head)->hh.tbl->hho);                                            \
+        }                                                                        \
+        if ((delptr)->hh.prev) {                                                 \
+            ((UT_hash_handle*)((char*)((delptr)->hh.prev) +                      \
+                    (head)->hh.tbl->hho))->next = (delptr)->hh.next;             \
+        } else {                                                                 \
+            DECLTYPE_ASSIGN(head,(delptr)->hh.next);                             \
+        }                                                                        \
+        if (_hd_hh_del->next) {                                                  \
+            ((UT_hash_handle*)((char*)_hd_hh_del->next +                         \
+                    (head)->hh.tbl->hho))->prev =                                \
+                    _hd_hh_del->prev;                                            \
+        }                                                                        \
+        HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt);   \
+        HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del);        \
+        (head)->hh.tbl->num_items--;                                             \
+    }                                                                            \
+    HASH_FSCK(hh,head);                                                          \
+} while (0)
+
+
+/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
+#define HASH_FIND_STR(head,findstr,out)                                          \
+    HASH_FIND(hh,head,findstr,strlen(findstr),out)
+#define HASH_ADD_STR(head,strfield,add)                                          \
+    HASH_ADD(hh,head,strfield,strlen(add->strfield),add)
+#define HASH_FIND_INT(head,findint,out)                                          \
+    HASH_FIND(hh,head,findint,sizeof(int),out)
+#define HASH_ADD_INT(head,intfield,add)                                          \
+    HASH_ADD(hh,head,intfield,sizeof(int),add)
+#define HASH_FIND_PTR(head,findptr,out)                                          \
+    HASH_FIND(hh,head,findptr,sizeof(void *),out)
+#define HASH_ADD_PTR(head,ptrfield,add)                                          \
+    HASH_ADD(hh,head,ptrfield,sizeof(void *),add)
+#define HASH_DEL(head,delptr)                                                    \
+    HASH_DELETE(hh,head,delptr)
+
+/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined.
+ * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined.
+ */
+#ifdef HASH_DEBUG
+#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0)
+#define HASH_FSCK(hh,head)                                                       \
+do {                                                                             \
+    unsigned _bkt_i;                                                             \
+    unsigned _count, _bkt_count;                                                 \
+    char *_prev;                                                                 \
+    struct UT_hash_handle *_thh;                                                 \
+    if (head) {                                                                  \
+        _count = 0;                                                              \
+        for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) {       \
+            _bkt_count = 0;                                                      \
+            _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head;                      \
+            _prev = NULL;                                                        \
+            while (_thh) {                                                       \
+               if (_prev != (char*)(_thh->hh_prev)) {                            \
+                   HASH_OOPS("invalid hh_prev %p, actual %p\n",                  \
+                    _thh->hh_prev, _prev );                                      \
+               }                                                                 \
+               _bkt_count++;                                                     \
+               _prev = (char*)(_thh);                                            \
+               _thh = _thh->hh_next;                                             \
+            }                                                                    \
+            _count += _bkt_count;                                                \
+            if ((head)->hh.tbl->buckets[_bkt_i].count !=  _bkt_count) {          \
+               HASH_OOPS("invalid bucket count %d, actual %d\n",                 \
+                (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count);              \
+            }                                                                    \
+        }                                                                        \
+        if (_count != (head)->hh.tbl->num_items) {                               \
+            HASH_OOPS("invalid hh item count %d, actual %d\n",                   \
+                (head)->hh.tbl->num_items, _count );                             \
+        }                                                                        \
+        /* traverse hh in app order; check next/prev integrity, count */         \
+        _count = 0;                                                              \
+        _prev = NULL;                                                            \
+        _thh =  &(head)->hh;                                                     \
+        while (_thh) {                                                           \
+           _count++;                                                             \
+           if (_prev !=(char*)(_thh->prev)) {                                    \
+              HASH_OOPS("invalid prev %p, actual %p\n",                          \
+                    _thh->prev, _prev );                                         \
+           }                                                                     \
+           _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh);                    \
+           _thh = ( _thh->next ?  (UT_hash_handle*)((char*)(_thh->next) +        \
+                                  (head)->hh.tbl->hho) : NULL );                 \
+        }                                                                        \
+        if (_count != (head)->hh.tbl->num_items) {                               \
+            HASH_OOPS("invalid app item count %d, actual %d\n",                  \
+                (head)->hh.tbl->num_items, _count );                             \
+        }                                                                        \
+    }                                                                            \
+} while (0)
+#else
+#define HASH_FSCK(hh,head) 
+#endif
+
+/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to 
+ * the descriptor to which this macro is defined for tuning the hash function.
+ * The app can #include <unistd.h> to get the prototype for write(2). */
+#ifdef HASH_EMIT_KEYS
+#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)                                   \
+do {                                                                             \
+    unsigned _klen = fieldlen;                                                   \
+    write(HASH_EMIT_KEYS, &_klen, sizeof(_klen));                                \
+    write(HASH_EMIT_KEYS, keyptr, fieldlen);                                     \
+} while (0)
+#else 
+#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)                    
+#endif
+
+/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
+#ifdef HASH_FUNCTION 
+#define HASH_FCN HASH_FUNCTION
+#else
+#define HASH_FCN HASH_JEN
+#endif
+
+/* The Bernstein hash function, used in Perl prior to v5.6 */
+#define HASH_BER(key,keylen,num_bkts,hashv,bkt)                                  \
+do {                                                                             \
+  unsigned _hb_keylen=keylen;                                                    \
+  char *_hb_key=(char*)(key);                                                    \
+  (hashv) = 0;                                                                   \
+  while (_hb_keylen--)  { (hashv) = ((hashv) * 33) + *_hb_key++; }               \
+  bkt = (hashv) & (num_bkts-1);                                                  \
+} while (0)
+
+
+/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at 
+ * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
+#define HASH_SAX(key,keylen,num_bkts,hashv,bkt)                                  \
+do {                                                                             \
+  unsigned _sx_i;                                                                \
+  char *_hs_key=(char*)(key);                                                    \
+  hashv = 0;                                                                     \
+  for(_sx_i=0; _sx_i < keylen; _sx_i++)                                          \
+      hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i];                     \
+  bkt = hashv & (num_bkts-1);                                                    \
+} while (0)
+
+#define HASH_FNV(key,keylen,num_bkts,hashv,bkt)                                  \
+do {                                                                             \
+  unsigned _fn_i;                                                                \
+  char *_hf_key=(char*)(key);                                                    \
+  hashv = 2166136261UL;                                                          \
+  for(_fn_i=0; _fn_i < keylen; _fn_i++)                                          \
+      hashv = (hashv * 16777619) ^ _hf_key[_fn_i];                               \
+  bkt = hashv & (num_bkts-1);                                                    \
+} while(0);
+ 
+#define HASH_OAT(key,keylen,num_bkts,hashv,bkt)                                  \
+do {                                                                             \
+  unsigned _ho_i;                                                                \
+  char *_ho_key=(char*)(key);                                                    \
+  hashv = 0;                                                                     \
+  for(_ho_i=0; _ho_i < keylen; _ho_i++) {                                        \
+      hashv += _ho_key[_ho_i];                                                   \
+      hashv += (hashv << 10);                                                    \
+      hashv ^= (hashv >> 6);                                                     \
+  }                                                                              \
+  hashv += (hashv << 3);                                                         \
+  hashv ^= (hashv >> 11);                                                        \
+  hashv += (hashv << 15);                                                        \
+  bkt = hashv & (num_bkts-1);                                                    \
+} while(0)
+
+#define HASH_JEN_MIX(a,b,c)                                                      \
+do {                                                                             \
+  a -= b; a -= c; a ^= ( c >> 13 );                                              \
+  b -= c; b -= a; b ^= ( a << 8 );                                               \
+  c -= a; c -= b; c ^= ( b >> 13 );                                              \
+  a -= b; a -= c; a ^= ( c >> 12 );                                              \
+  b -= c; b -= a; b ^= ( a << 16 );                                              \
+  c -= a; c -= b; c ^= ( b >> 5 );                                               \
+  a -= b; a -= c; a ^= ( c >> 3 );                                               \
+  b -= c; b -= a; b ^= ( a << 10 );                                              \
+  c -= a; c -= b; c ^= ( b >> 15 );                                              \
+} while (0)
+
+#define HASH_JEN(key,keylen,num_bkts,hashv,bkt)                                  \
+do {                                                                             \
+  unsigned _hj_i,_hj_j,_hj_k;                                                    \
+  char *_hj_key=(char*)(key);                                                    \
+  hashv = 0xfeedbeef;                                                            \
+  _hj_i = _hj_j = 0x9e3779b9;                                                    \
+  _hj_k = keylen;                                                                \
+  while (_hj_k >= 12) {                                                          \
+    _hj_i +=    (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 )                      \
+        + ( (unsigned)_hj_key[2] << 16 )                                         \
+        + ( (unsigned)_hj_key[3] << 24 ) );                                      \
+    _hj_j +=    (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 )                      \
+        + ( (unsigned)_hj_key[6] << 16 )                                         \
+        + ( (unsigned)_hj_key[7] << 24 ) );                                      \
+    hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 )                         \
+        + ( (unsigned)_hj_key[10] << 16 )                                        \
+        + ( (unsigned)_hj_key[11] << 24 ) );                                     \
+                                                                                 \
+     HASH_JEN_MIX(_hj_i, _hj_j, hashv);                                          \
+                                                                                 \
+     _hj_key += 12;                                                              \
+     _hj_k -= 12;                                                                \
+  }                                                                              \
+  hashv += keylen;                                                               \
+  switch ( _hj_k ) {                                                             \
+     case 11: hashv += ( (unsigned)_hj_key[10] << 24 );                          \
+     case 10: hashv += ( (unsigned)_hj_key[9] << 16 );                           \
+     case 9:  hashv += ( (unsigned)_hj_key[8] << 8 );                            \
+     case 8:  _hj_j += ( (unsigned)_hj_key[7] << 24 );                           \
+     case 7:  _hj_j += ( (unsigned)_hj_key[6] << 16 );                           \
+     case 6:  _hj_j += ( (unsigned)_hj_key[5] << 8 );                            \
+     case 5:  _hj_j += _hj_key[4];                                               \
+     case 4:  _hj_i += ( (unsigned)_hj_key[3] << 24 );                           \
+     case 3:  _hj_i += ( (unsigned)_hj_key[2] << 16 );                           \
+     case 2:  _hj_i += ( (unsigned)_hj_key[1] << 8 );                            \
+     case 1:  _hj_i += _hj_key[0];                                               \
+  }                                                                              \
+  HASH_JEN_MIX(_hj_i, _hj_j, hashv);                                             \
+  bkt = hashv & (num_bkts-1);                                                    \
+} while(0)
+
+/* The Paul Hsieh hash function */
+#undef get16bits
+#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__)             \
+  || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
+#define get16bits(d) (*((const uint16_t *) (d)))
+#endif
+
+#if !defined (get16bits)
+#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)             \
+                       +(uint32_t)(((const uint8_t *)(d))[0]) )
+#endif
+#define HASH_SFH(key,keylen,num_bkts,hashv,bkt)                                  \
+do {                                                                             \
+  char *_sfh_key=(char*)(key);                                                   \
+  uint32_t _sfh_tmp, _sfh_len = keylen;                                          \
+                                                                                 \
+  int _sfh_rem = _sfh_len & 3;                                                   \
+  _sfh_len >>= 2;                                                                \
+  hashv = 0xcafebabe;                                                            \
+                                                                                 \
+  /* Main loop */                                                                \
+  for (;_sfh_len > 0; _sfh_len--) {                                              \
+    hashv    += get16bits (_sfh_key);                                            \
+    _sfh_tmp       = (get16bits (_sfh_key+2) << 11) ^ hashv;                     \
+    hashv     = (hashv << 16) ^ _sfh_tmp;                                        \
+    _sfh_key += 2*sizeof (uint16_t);                                             \
+    hashv    += hashv >> 11;                                                     \
+  }                                                                              \
+                                                                                 \
+  /* Handle end cases */                                                         \
+  switch (_sfh_rem) {                                                            \
+    case 3: hashv += get16bits (_sfh_key);                                       \
+            hashv ^= hashv << 16;                                                \
+            hashv ^= _sfh_key[sizeof (uint16_t)] << 18;                          \
+            hashv += hashv >> 11;                                                \
+            break;                                                               \
+    case 2: hashv += get16bits (_sfh_key);                                       \
+            hashv ^= hashv << 11;                                                \
+            hashv += hashv >> 17;                                                \
+            break;                                                               \
+    case 1: hashv += *_sfh_key;                                                  \
+            hashv ^= hashv << 10;                                                \
+            hashv += hashv >> 1;                                                 \
+  }                                                                              \
+                                                                                 \
+    /* Force "avalanching" of final 127 bits */                                  \
+    hashv ^= hashv << 3;                                                         \
+    hashv += hashv >> 5;                                                         \
+    hashv ^= hashv << 4;                                                         \
+    hashv += hashv >> 17;                                                        \
+    hashv ^= hashv << 25;                                                        \
+    hashv += hashv >> 6;                                                         \
+    bkt = hashv & (num_bkts-1);                                                  \
+} while(0);
+
+#ifdef HASH_USING_NO_STRICT_ALIASING
+/* The MurmurHash exploits some CPU's (e.g. x86) tolerance for unaligned reads.
+ * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
+ * So MurmurHash comes in two versions, the faster unaligned one and the slower
+ * aligned one. We only use the faster one on CPU's where we know it's safe. 
+ *
+ * Note the preprocessor built-in defines can be emitted using:
+ *
+ *   gcc -m64 -dM -E - < /dev/null                  (on gcc)
+ *   cc -## a.c (where a.c is a simple test file)   (Sun Studio)
+ */
+#if (defined(__i386__) || defined(__x86_64__)) 
+#define HASH_MUR HASH_MUR_UNALIGNED
+#else
+#define HASH_MUR HASH_MUR_ALIGNED
+#endif
+
+/* Appleby's MurmurHash fast version for unaligned-tolerant archs like i386 */
+#define HASH_MUR_UNALIGNED(key,keylen,num_bkts,hashv,bkt)                        \
+do {                                                                             \
+  const unsigned int _mur_m = 0x5bd1e995;                                        \
+  const int _mur_r = 24;                                                         \
+  hashv = 0xcafebabe ^ keylen;                                                   \
+  char *_mur_key = (char *)(key);                                                \
+  uint32_t _mur_tmp, _mur_len = keylen;                                          \
+                                                                                 \
+  for (;_mur_len >= 4; _mur_len-=4) {                                            \
+    _mur_tmp = *(uint32_t *)_mur_key;                                            \
+    _mur_tmp *= _mur_m;                                                          \
+    _mur_tmp ^= _mur_tmp >> _mur_r;                                              \
+    _mur_tmp *= _mur_m;                                                          \
+    hashv *= _mur_m;                                                             \
+    hashv ^= _mur_tmp;                                                           \
+    _mur_key += 4;                                                               \
+  }                                                                              \
+                                                                                 \
+  switch(_mur_len)                                                               \
+  {                                                                              \
+    case 3: hashv ^= _mur_key[2] << 16;                                          \
+    case 2: hashv ^= _mur_key[1] << 8;                                           \
+    case 1: hashv ^= _mur_key[0];                                                \
+            hashv *= _mur_m;                                                     \
+  };                                                                             \
+                                                                                 \
+  hashv ^= hashv >> 13;                                                          \
+  hashv *= _mur_m;                                                               \
+  hashv ^= hashv >> 15;                                                          \
+                                                                                 \
+  bkt = hashv & (num_bkts-1);                                                    \
+} while(0)
+
+/* Appleby's MurmurHash version for alignment-sensitive archs like Sparc */
+#define HASH_MUR_ALIGNED(key,keylen,num_bkts,hashv,bkt)                          \
+do {                                                                             \
+  const unsigned int _mur_m = 0x5bd1e995;                                        \
+  const int _mur_r = 24;                                                         \
+  hashv = 0xcafebabe ^ (keylen);                                                 \
+  char *_mur_key = (char *)(key);                                                \
+  uint32_t _mur_len = keylen;                                                    \
+  int _mur_align = (int)_mur_key & 3;                                            \
+                                                                                 \
+  if (_mur_align && (_mur_len >= 4)) {                                           \
+    unsigned _mur_t = 0, _mur_d = 0;                                             \
+    switch(_mur_align) {                                                         \
+      case 1: _mur_t |= _mur_key[2] << 16;                                       \
+      case 2: _mur_t |= _mur_key[1] << 8;                                        \
+      case 3: _mur_t |= _mur_key[0];                                             \
+    }                                                                            \
+    _mur_t <<= (8 * _mur_align);                                                 \
+    _mur_key += 4-_mur_align;                                                    \
+    _mur_len -= 4-_mur_align;                                                    \
+    int _mur_sl = 8 * (4-_mur_align);                                            \
+    int _mur_sr = 8 * _mur_align;                                                \
+                                                                                 \
+    for (;_mur_len >= 4; _mur_len-=4) {                                          \
+      _mur_d = *(unsigned *)_mur_key;                                            \
+      _mur_t = (_mur_t >> _mur_sr) | (_mur_d << _mur_sl);                        \
+      unsigned _mur_k = _mur_t;                                                  \
+      _mur_k *= _mur_m;                                                          \
+      _mur_k ^= _mur_k >> _mur_r;                                                \
+      _mur_k *= _mur_m;                                                          \
+      hashv *= _mur_m;                                                           \
+      hashv ^= _mur_k;                                                           \
+      _mur_t = _mur_d;                                                           \
+      _mur_key += 4;                                                             \
+    }                                                                            \
+    _mur_d = 0;                                                                  \
+    if(_mur_len >= _mur_align) {                                                 \
+      switch(_mur_align) {                                                       \
+        case 3: _mur_d |= _mur_key[2] << 16;                                     \
+        case 2: _mur_d |= _mur_key[1] << 8;                                      \
+        case 1: _mur_d |= _mur_key[0];                                           \
+      }                                                                          \
+      unsigned _mur_k = (_mur_t >> _mur_sr) | (_mur_d << _mur_sl);               \
+      _mur_k *= _mur_m;                                                          \
+      _mur_k ^= _mur_k >> _mur_r;                                                \
+      _mur_k *= _mur_m;                                                          \
+      hashv *= _mur_m;                                                           \
+      hashv ^= _mur_k;                                                           \
+      _mur_k += _mur_align;                                                      \
+      _mur_len -= _mur_align;                                                    \
+                                                                                 \
+      switch(_mur_len)                                                           \
+      {                                                                          \
+        case 3: hashv ^= _mur_key[2] << 16;                                      \
+        case 2: hashv ^= _mur_key[1] << 8;                                       \
+        case 1: hashv ^= _mur_key[0];                                            \
+                hashv *= _mur_m;                                                 \
+      }                                                                          \
+    } else {                                                                     \
+      switch(_mur_len)                                                           \
+      {                                                                          \
+        case 3: _mur_d ^= _mur_key[2] << 16;                                     \
+        case 2: _mur_d ^= _mur_key[1] << 8;                                      \
+        case 1: _mur_d ^= _mur_key[0];                                           \
+        case 0: hashv ^= (_mur_t >> _mur_sr) | (_mur_d << _mur_sl);              \
+        hashv *= _mur_m;                                                         \
+      }                                                                          \
+    }                                                                            \
+                                                                                 \
+    hashv ^= hashv >> 13;                                                        \
+    hashv *= _mur_m;                                                             \
+    hashv ^= hashv >> 15;                                                        \
+  } else {                                                                       \
+    for (;_mur_len >= 4; _mur_len-=4) {                                          \
+      unsigned _mur_k = *(unsigned*)_mur_key;                                    \
+      _mur_k *= _mur_m;                                                          \
+      _mur_k ^= _mur_k >> _mur_r;                                                \
+      _mur_k *= _mur_m;                                                          \
+      hashv *= _mur_m;                                                           \
+      hashv ^= _mur_k;                                                           \
+      _mur_key += 4;                                                             \
+    }                                                                            \
+    switch(_mur_len)                                                             \
+    {                                                                            \
+      case 3: hashv ^= _mur_key[2] << 16;                                        \
+      case 2: hashv ^= _mur_key[1] << 8;                                         \
+      case 1: hashv ^= _mur_key[0];                                              \
+      hashv *= _mur_m;                                                           \
+    }                                                                            \
+                                                                                 \
+    hashv ^= hashv >> 13;                                                        \
+    hashv *= _mur_m;                                                             \
+    hashv ^= hashv >> 15;                                                        \
+  }                                                                              \
+  bkt = hashv & (num_bkts-1);                                                    \
+} while(0)
+#endif  /* HASH_USING_NO_STRICT_ALIASING */
+
+/* key comparison function; return 0 if keys equal */
+#define HASH_KEYCMP(a,b,len) memcmp(a,b,len) 
+
+/* iterate over items in a known bucket to find desired item */
+#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out)                       \
+do {                                                                             \
+ if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head));          \
+ else out=NULL;                                                                  \
+ while (out) {                                                                   \
+    if (out->hh.keylen == keylen_in) {                                           \
+        if ((HASH_KEYCMP(out->hh.key,keyptr,keylen_in)) == 0) break;             \
+    }                                                                            \
+    if (out->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,out->hh.hh_next)); \
+    else out = NULL;                                                             \
+ }                                                                               \
+} while(0)
+
+/* add an item to a bucket  */
+#define HASH_ADD_TO_BKT(head,addhh)                                              \
+do {                                                                             \
+ head.count++;                                                                   \
+ (addhh)->hh_next = head.hh_head;                                                \
+ (addhh)->hh_prev = NULL;                                                        \
+ if (head.hh_head) { (head).hh_head->hh_prev = (addhh); }                        \
+ (head).hh_head=addhh;                                                           \
+ if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH)             \
+     && (addhh)->tbl->noexpand != 1) {                                           \
+       HASH_EXPAND_BUCKETS((addhh)->tbl);                                        \
+ }                                                                               \
+} while(0)
+
+/* remove an item from a given bucket */
+#define HASH_DEL_IN_BKT(hh,head,hh_del)                                          \
+    (head).count--;                                                              \
+    if ((head).hh_head == hh_del) {                                              \
+      (head).hh_head = hh_del->hh_next;                                          \
+    }                                                                            \
+    if (hh_del->hh_prev) {                                                       \
+        hh_del->hh_prev->hh_next = hh_del->hh_next;                              \
+    }                                                                            \
+    if (hh_del->hh_next) {                                                       \
+        hh_del->hh_next->hh_prev = hh_del->hh_prev;                              \
+    }                                                                
+
+/* Bucket expansion has the effect of doubling the number of buckets
+ * and redistributing the items into the new buckets. Ideally the
+ * items will distribute more or less evenly into the new buckets
+ * (the extent to which this is true is a measure of the quality of
+ * the hash function as it applies to the key domain). 
+ * 
+ * With the items distributed into more buckets, the chain length
+ * (item count) in each bucket is reduced. Thus by expanding buckets
+ * the hash keeps a bound on the chain length. This bounded chain 
+ * length is the essence of how a hash provides constant time lookup.
+ * 
+ * The calculation of tbl->ideal_chain_maxlen below deserves some
+ * explanation. First, keep in mind that we're calculating the ideal
+ * maximum chain length based on the *new* (doubled) bucket count.
+ * In fractions this is just n/b (n=number of items,b=new num buckets).
+ * Since the ideal chain length is an integer, we want to calculate 
+ * ceil(n/b). We don't depend on floating point arithmetic in this
+ * hash, so to calculate ceil(n/b) with integers we could write
+ * 
+ *      ceil(n/b) = (n/b) + ((n%b)?1:0)
+ * 
+ * and in fact a previous version of this hash did just that.
+ * But now we have improved things a bit by recognizing that b is
+ * always a power of two. We keep its base 2 log handy (call it lb),
+ * so now we can write this with a bit shift and logical AND:
+ * 
+ *      ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
+ * 
+ */
+#define HASH_EXPAND_BUCKETS(tbl)                                                 \
+do {                                                                             \
+    unsigned _he_bkt;                                                            \
+    unsigned _he_bkt_i;                                                          \
+    struct UT_hash_handle *_he_thh, *_he_hh_nxt;                                 \
+    UT_hash_bucket *_he_new_buckets, *_he_newbkt;                                \
+    _he_new_buckets = (UT_hash_bucket*)uthash_malloc(                            \
+             2 * tbl->num_buckets * sizeof(struct UT_hash_bucket));              \
+    if (!_he_new_buckets) { uthash_fatal( "out of memory"); }                    \
+    memset(_he_new_buckets, 0,                                                   \
+            2 * tbl->num_buckets * sizeof(struct UT_hash_bucket));               \
+    tbl->ideal_chain_maxlen =                                                    \
+       (tbl->num_items >> (tbl->log2_num_buckets+1)) +                           \
+       ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0);                    \
+    tbl->nonideal_items = 0;                                                     \
+    for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++)                \
+    {                                                                            \
+        _he_thh = tbl->buckets[ _he_bkt_i ].hh_head;                             \
+        while (_he_thh) {                                                        \
+           _he_hh_nxt = _he_thh->hh_next;                                        \
+           HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt);            \
+           _he_newbkt = &(_he_new_buckets[ _he_bkt ]);                           \
+           if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) {                \
+             tbl->nonideal_items++;                                              \
+             _he_newbkt->expand_mult = _he_newbkt->count /                       \
+                                        tbl->ideal_chain_maxlen;                 \
+           }                                                                     \
+           _he_thh->hh_prev = NULL;                                              \
+           _he_thh->hh_next = _he_newbkt->hh_head;                               \
+           if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev =               \
+                _he_thh;                                                         \
+           _he_newbkt->hh_head = _he_thh;                                        \
+           _he_thh = _he_hh_nxt;                                                 \
+        }                                                                        \
+    }                                                                            \
+    uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
+    tbl->num_buckets *= 2;                                                       \
+    tbl->log2_num_buckets++;                                                     \
+    tbl->buckets = _he_new_buckets;                                              \
+    tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ?         \
+        (tbl->ineff_expands+1) : 0;                                              \
+    if (tbl->ineff_expands > 1) {                                                \
+        tbl->noexpand=1;                                                         \
+        uthash_noexpand_fyi(tbl);                                                \
+    }                                                                            \
+    uthash_expand_fyi(tbl);                                                      \
+} while(0)
+
+
+/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
+/* Note that HASH_SORT assumes the hash handle name to be hh. 
+ * HASH_SRT was added to allow the hash handle name to be passed in. */
+#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
+#define HASH_SRT(hh,head,cmpfcn)                                                 \
+do {                                                                             \
+  unsigned _hs_i;                                                                \
+  unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize;               \
+  struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail;            \
+  if (head) {                                                                    \
+      _hs_insize = 1;                                                            \
+      _hs_looping = 1;                                                           \
+      _hs_list = &((head)->hh);                                                  \
+      while (_hs_looping) {                                                      \
+          _hs_p = _hs_list;                                                      \
+          _hs_list = NULL;                                                       \
+          _hs_tail = NULL;                                                       \
+          _hs_nmerges = 0;                                                       \
+          while (_hs_p) {                                                        \
+              _hs_nmerges++;                                                     \
+              _hs_q = _hs_p;                                                     \
+              _hs_psize = 0;                                                     \
+              for ( _hs_i = 0; _hs_i  < _hs_insize; _hs_i++ ) {                  \
+                  _hs_psize++;                                                   \
+                  _hs_q = (UT_hash_handle*)((_hs_q->next) ?                      \
+                          ((void*)((char*)(_hs_q->next) +                        \
+                          (head)->hh.tbl->hho)) : NULL);                         \
+                  if (! (_hs_q) ) break;                                         \
+              }                                                                  \
+              _hs_qsize = _hs_insize;                                            \
+              while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) {           \
+                  if (_hs_psize == 0) {                                          \
+                      _hs_e = _hs_q;                                             \
+                      _hs_q = (UT_hash_handle*)((_hs_q->next) ?                  \
+                              ((void*)((char*)(_hs_q->next) +                    \
+                              (head)->hh.tbl->hho)) : NULL);                     \
+                      _hs_qsize--;                                               \
+                  } else if ( (_hs_qsize == 0) || !(_hs_q) ) {                   \
+                      _hs_e = _hs_p;                                             \
+                      _hs_p = (UT_hash_handle*)((_hs_p->next) ?                  \
+                              ((void*)((char*)(_hs_p->next) +                    \
+                              (head)->hh.tbl->hho)) : NULL);                     \
+                      _hs_psize--;                                               \
+                  } else if ((                                                   \
+                      cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \
+                             DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \
+                             ) <= 0) {                                           \
+                      _hs_e = _hs_p;                                             \
+                      _hs_p = (UT_hash_handle*)((_hs_p->next) ?                  \
+                              ((void*)((char*)(_hs_p->next) +                    \
+                              (head)->hh.tbl->hho)) : NULL);                     \
+                      _hs_psize--;                                               \
+                  } else {                                                       \
+                      _hs_e = _hs_q;                                             \
+                      _hs_q = (UT_hash_handle*)((_hs_q->next) ?                  \
+                              ((void*)((char*)(_hs_q->next) +                    \
+                              (head)->hh.tbl->hho)) : NULL);                     \
+                      _hs_qsize--;                                               \
+                  }                                                              \
+                  if ( _hs_tail ) {                                              \
+                      _hs_tail->next = ((_hs_e) ?                                \
+                            ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL);          \
+                  } else {                                                       \
+                      _hs_list = _hs_e;                                          \
+                  }                                                              \
+                  _hs_e->prev = ((_hs_tail) ?                                    \
+                     ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL);              \
+                  _hs_tail = _hs_e;                                              \
+              }                                                                  \
+              _hs_p = _hs_q;                                                     \
+          }                                                                      \
+          _hs_tail->next = NULL;                                                 \
+          if ( _hs_nmerges <= 1 ) {                                              \
+              _hs_looping=0;                                                     \
+              (head)->hh.tbl->tail = _hs_tail;                                   \
+              DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list));      \
+          }                                                                      \
+          _hs_insize *= 2;                                                       \
+      }                                                                          \
+      HASH_FSCK(hh,head);                                                        \
+ }                                                                               \
+} while (0)
+
+/* This function selects items from one hash into another hash. 
+ * The end result is that the selected items have dual presence 
+ * in both hashes. There is no copy of the items made; rather 
+ * they are added into the new hash through a secondary hash 
+ * hash handle that must be present in the structure. */
+#define HASH_SELECT(hh_dst, dst, hh_src, src, cond)                              \
+do {                                                                             \
+  unsigned _src_bkt, _dst_bkt;                                                   \
+  void *_last_elt=NULL, *_elt;                                                   \
+  UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL;                         \
+  ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst));                 \
+  if (src) {                                                                     \
+    for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) {     \
+      for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head;                \
+          _src_hh;                                                               \
+          _src_hh = _src_hh->hh_next) {                                          \
+          _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh);                       \
+          if (cond(_elt)) {                                                      \
+            _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho);               \
+            _dst_hh->key = _src_hh->key;                                         \
+            _dst_hh->keylen = _src_hh->keylen;                                   \
+            _dst_hh->hashv = _src_hh->hashv;                                     \
+            _dst_hh->prev = _last_elt;                                           \
+            _dst_hh->next = NULL;                                                \
+            if (_last_elt_hh) { _last_elt_hh->next = _elt; }                     \
+            if (!dst) {                                                          \
+              DECLTYPE_ASSIGN(dst,_elt);                                         \
+              HASH_MAKE_TABLE(hh_dst,dst);                                       \
+            } else {                                                             \
+              _dst_hh->tbl = (dst)->hh_dst.tbl;                                  \
+            }                                                                    \
+            HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt);    \
+            HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh);            \
+            (dst)->hh_dst.tbl->num_items++;                                      \
+            _last_elt = _elt;                                                    \
+            _last_elt_hh = _dst_hh;                                              \
+          }                                                                      \
+      }                                                                          \
+    }                                                                            \
+  }                                                                              \
+  HASH_FSCK(hh_dst,dst);                                                         \
+} while (0)
+
+#define HASH_CLEAR(hh,head)                                                      \
+do {                                                                             \
+  if (head) {                                                                    \
+    uthash_free((head)->hh.tbl->buckets,                                         \
+                (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket));      \
+    uthash_free((head)->hh.tbl, sizeof(UT_hash_table));                          \
+    (head)=NULL;                                                                 \
+  }                                                                              \
+} while(0)
+
+#ifdef NO_DECLTYPE
+#define HASH_ITER(hh,head,el,tmp)                                                \
+for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL);       \
+  el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL)) 
+#else
+#define HASH_ITER(hh,head,el,tmp)                                                \
+for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL);                 \
+  el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL))
+#endif
+
+/* obtain a count of items in the hash */
+#define HASH_COUNT(head) HASH_CNT(hh,head) 
+#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0)
+
+typedef struct UT_hash_bucket {
+   struct UT_hash_handle *hh_head;
+   unsigned count;
+
+   /* expand_mult is normally set to 0. In this situation, the max chain length
+    * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
+    * the bucket's chain exceeds this length, bucket expansion is triggered). 
+    * However, setting expand_mult to a non-zero value delays bucket expansion
+    * (that would be triggered by additions to this particular bucket)
+    * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
+    * (The multiplier is simply expand_mult+1). The whole idea of this
+    * multiplier is to reduce bucket expansions, since they are expensive, in
+    * situations where we know that a particular bucket tends to be overused.
+    * It is better to let its chain length grow to a longer yet-still-bounded
+    * value, than to do an O(n) bucket expansion too often. 
+    */
+   unsigned expand_mult;
+
+} UT_hash_bucket;
+
+/* random signature used only to find hash tables in external analysis */
+#define HASH_SIGNATURE 0xa0111fe1
+#define HASH_BLOOM_SIGNATURE 0xb12220f2
+
+typedef struct UT_hash_table {
+   UT_hash_bucket *buckets;
+   unsigned num_buckets, log2_num_buckets;
+   unsigned num_items;
+   struct UT_hash_handle *tail; /* tail hh in app order, for fast append    */
+   ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */
+
+   /* in an ideal situation (all buckets used equally), no bucket would have
+    * more than ceil(#items/#buckets) items. that's the ideal chain length. */
+   unsigned ideal_chain_maxlen;
+
+   /* nonideal_items is the number of items in the hash whose chain position
+    * exceeds the ideal chain maxlen. these items pay the penalty for an uneven
+    * hash distribution; reaching them in a chain traversal takes >ideal steps */
+   unsigned nonideal_items;
+
+   /* ineffective expands occur when a bucket doubling was performed, but 
+    * afterward, more than half the items in the hash had nonideal chain
+    * positions. If this happens on two consecutive expansions we inhibit any
+    * further expansion, as it's not helping; this happens when the hash
+    * function isn't a good fit for the key domain. When expansion is inhibited
+    * the hash will still work, albeit no longer in constant time. */
+   unsigned ineff_expands, noexpand;
+
+   uint32_t signature; /* used only to find hash tables in external analysis */
+#ifdef HASH_BLOOM
+   uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
+   uint8_t *bloom_bv;
+   char bloom_nbits;
+#endif
+
+} UT_hash_table;
+
+typedef struct UT_hash_handle {
+   struct UT_hash_table *tbl;
+   void *prev;                       /* prev element in app order      */
+   void *next;                       /* next element in app order      */
+   struct UT_hash_handle *hh_prev;   /* previous hh in bucket order    */
+   struct UT_hash_handle *hh_next;   /* next hh in bucket order        */
+   void *key;                        /* ptr to enclosing struct's key  */
+   unsigned keylen;                  /* enclosing struct's key len     */
+   unsigned hashv;                   /* result of hash-fcn(key)        */
+} UT_hash_handle;
+
+#endif /* UTHASH_H */