[petsc-dev] Can someone explain what is going on here?

Jed Brown jedbrown at mcs.anl.gov
Tue May 21 08:13:53 CDT 2013


Matthew Knepley <knepley at gmail.com> writes:

> https://bitbucket.org/petsc/petsc/src/ff331c249a8bbf694711b310f25fec0e839b33db/src/ksp/ksp/interface/itfunc.c?at=master#cl-202
>
> Dave has a DMComposite which he splits using FS, but then he wants
> to use GMG on the velocity block. He manually sets the DM, but this
> wipes out the matrix which FS set there.

It's a crummy interface, but this is what is intended:

  KSPSetDMActive(ksp,PETSC_FALSE);

> Do we have a clear explanation of what setting a DM on the solver does?
>
>   Thanks,
>
>     Matt
>
> -- 
> What most experimenters take for granted before they begin their
> experiments is infinitely more interesting than any results to which their
> experiments lead.
> -- Norbert Wiener
> static char help[] = "Solves the incompressible, variable viscosity stokes equation in 2d using different stable element types.\n";
>
> /* Contributed by Dave May */
>
> #include <petscksp.h>
> #include <petscdmda.h>
> #include <petsc-private/dmdaimpl.h>
>
> /* A Maple-generated exact solution created by Mirko Velic (mirko.velic at sci.monash.edu.au) */
> #include "ex43-solcx.h"
>
> PetscErrorCode DMDABCApplyFreeSlip(DM,Mat,Vec);
> PetscErrorCode _Stokes2DAssembleOperatorsNNZ_Q1mP1(Mat Aup,Mat Apu,DM dav,DM dap);
> PetscErrorCode StokesDMDAView(DM da,Vec x);
> PetscErrorCode StokesCoeffDMDAView(DM da,Vec x);
>
>
> #define NSD            2 /* number of spatial dimensions */
> #define NODES_PER_EL   4 /* nodes per element */
> #define U_DOFS         2 /* degrees of freedom per velocity node */
> #define P_DOFS         1 /* degrees of freedom per pressure node */
> #define GAUSS_POINTS   4
>
> /* cell based evaluation */
> typedef struct {
>   PetscScalar eta,fx,fy;
> } Coefficients;
>
> /* Gauss point based evaluation 8+4+4+4 = 20 */
> typedef struct {
>   PetscScalar gp_coords[2*GAUSS_POINTS];
>   PetscScalar eta[GAUSS_POINTS];
>   PetscScalar fx[GAUSS_POINTS];
>   PetscScalar fy[GAUSS_POINTS];
> } GaussPointCoefficients;
>
> typedef struct {
>   PetscScalar u_dof;
>   PetscScalar v_dof;
>   PetscScalar p_dof;
> } StokesDOF;
>
> typedef struct {
>   PetscScalar u_dof;
>   PetscScalar v_dof;
> } StokesUDOF;
>
> static PetscErrorCode _Stokes2DAssembleOperatorsNNZ_Q1P0(Mat Aup,Mat Apu,DM dav,DM dap);
>
>
> typedef enum { BCNoSlip=0, BCFreeSlip, BCFreeSurface } BCType;
>
> /*
>
> D = [ 2.eta   0   0   ]
> [   0   2.eta 0   ]
> [   0     0   eta ]
>
> B = [ d_dx   0   ]
> [  0    d_dy ]
> [ d_dy  d_dx ]
>
> */
>
> /* FEM routines */
> /*
> Element: Local basis function ordering
> 1-----2
> |     |
> |     |
> 0-----3
> */
> static void FEBasisQ1Evaluate_Ni(PetscScalar _xi[],PetscScalar Ni[])
> {
>   PetscScalar xi  = _xi[0];
>   PetscScalar eta = _xi[1];
>
>   Ni[0] = 0.25*(1.0-xi)*(1.0-eta);
>   Ni[1] = 0.25*(1.0-xi)*(1.0+eta);
>   Ni[2] = 0.25*(1.0+xi)*(1.0+eta);
>   Ni[3] = 0.25*(1.0+xi)*(1.0-eta);
> }
>
> static void FEBasisQ1Evaluate_Mi_P0(PetscScalar _xi[],PetscScalar Mi[])
> {
>   Mi[0] = 1.0;
> }
>
> static void FEBasisQ1Evaluate_Mi_P1(PetscScalar _xi[],PetscScalar Mi[])
> {
> 	//	printf("hey P1L_2D\n");
> 	Mi[0] =  1.0;
> 	Mi[1] = _xi[0];
> 	Mi[2] = _xi[1];
> }
>
> /* expect elcoords to be vertices of macro element */
> static void __FEBasisQ1Evaluate_Mi_P1rel(PetscScalar _xi[],PetscScalar elcoords[],PetscScalar Mi[])
> {
> 	PetscScalar Ni_geom[4];
> 	PetscScalar _xg[] = {0.0,0.0};
> 	PetscScalar avg_x,avg_y,Lx,Ly;
> 	PetscInt i;
>   PetscScalar xi  = _xi[0];
>   PetscScalar eta = _xi[1];
> 	PetscScalar xmin[2],xmax[2];
> 	
> 	FEBasisQ1Evaluate_Ni(_xi,Ni_geom);
>
> 	xmin[0] = 1.0e32;  xmin[1] = 1.0e32;
> 	xmax[0] = -1.0e32; xmax[1] = -1.0e32;
> 	avg_x = avg_y = 0.0;
> 	for( i=0; i<4; i++ ) {
> 		PetscScalar xn = elcoords[2*i  ];
> 		PetscScalar yn = elcoords[2*i+1];
> 		
> 		_xg[0] = _xg[0] + Ni_geom[i] * xn;
> 		_xg[1] = _xg[1] + Ni_geom[i] * yn;
> 		
> 		avg_x = avg_x + xn;
> 		avg_y = avg_y + yn;
>
> 		if (xn<xmin[0]) { xmin[0] = xn; }
> 		if (yn<xmin[1]) { xmin[1] = yn; }
>
> 		if (xn>xmax[0]) { xmax[0] = xn; }
> 		if (yn>xmax[1]) { xmax[1] = yn; }
> 	}
> 	
> 	avg_x = 0.25 * avg_x;
> 	avg_y = 0.25 * avg_y;
> 	
> 	Lx = xmax[0] - xmin[0];
> 	Ly = xmax[1] - xmin[1];
> 	
> 	_xg[0] = ( _xg[0] - avg_x ) / Lx ;
> 	_xg[1] = ( _xg[1] - avg_y ) / Ly ;
> 	
> 	FEBasisQ1Evaluate_Mi_P1(_xg,Mi);
> }
>
> static void FEBasisQ1Evaluate_Mi_P1rel(PetscScalar xp[],PetscScalar elcoords[],PetscScalar Mi[])
> {
> 	PetscScalar  Ni_geom[4];
> 	PetscScalar  _xi[2];
> 	PetscScalar  _xg[] = {0.0,0.0};
> 	PetscScalar  avg_x,avg_y,Lx,Ly;
> 	PetscInt     i;
>
> 	/*
> 	 1--x--2
> 	 x--x--x
> 	 0--x--3
> 	 */
> 	Lx = elcoords[2*3  ] - elcoords[2*0  ];
> 	Ly = elcoords[2*1+1] - elcoords[2*0+1];
> 	
> 	/* convert xp to xi OVER the macro cell */
> 	// (xi-(-1))/2 = (xp-x0)/Lx
> 	_xi[0] = -1.0 + 2.0*(xp[0] - elcoords[2*0  ])/Lx;
> 	_xi[1] = -1.0 + 2.0*(xp[1] - elcoords[2*0+1])/Ly;
> 	
> 	FEBasisQ1Evaluate_Ni(_xi,Ni_geom);
> 	
> 	avg_x = avg_y = 0.0;
> 	for( i=0; i<4; i++ ) {
> 		_xg[0] = _xg[0] + Ni_geom[i] * elcoords[2*i  ];
> 		_xg[1] = _xg[1] + Ni_geom[i] * elcoords[2*i+1];
> 		
> 		avg_x = avg_x + elcoords[2*i  ];
> 		avg_y = avg_y + elcoords[2*i+1];
> 	}
> 	
> 	avg_x = 0.25 * avg_x;
> 	avg_y = 0.25 * avg_y;
> 	
> 	
> 	_xg[0] = 2.0 * ( _xg[0] - avg_x ) / Lx ;
> 	_xg[1] = 2.0 * ( _xg[1] - avg_y ) / Ly ;
> 	
> 	FEBasisQ1Evaluate_Mi_P1(_xg,Mi);
> }
>
> static void FEBasisQ1Evaluate_dNidxi(PetscScalar _xi[],PetscScalar GNi[][NODES_PER_EL])
> {
>   PetscScalar xi  = _xi[0];
>   PetscScalar eta = _xi[1];
>
>   GNi[0][0] = -0.25*(1.0-eta);
>   GNi[0][1] = -0.25*(1.0+eta);
>   GNi[0][2] =   0.25*(1.0+eta);
>   GNi[0][3] =   0.25*(1.0-eta);
>
>   GNi[1][0] = -0.25*(1.0-xi);
>   GNi[1][1] =   0.25*(1.0-xi);
>   GNi[1][2] =   0.25*(1.0+xi);
>   GNi[1][3] = -0.25*(1.0+xi);
> }
>
> static void FEBasisQ1EvaluateGeometry(PetscScalar GNi[][NODES_PER_EL],PetscScalar GNx[][NODES_PER_EL],PetscScalar coords[],PetscScalar *det_J)
> {
>   PetscScalar J00,J01,J10,J11,J;
>   PetscScalar iJ00,iJ01,iJ10,iJ11;
>   PetscInt    i;
>
>   J00 = J01 = J10 = J11 = 0.0;
>   for (i = 0; i < NODES_PER_EL; i++) {
>     PetscScalar cx = coords[2*i+0];
>     PetscScalar cy = coords[2*i+1];
>
>     J00 = J00+GNi[0][i]*cx;      /* J_xx = dx/dxi */
>     J01 = J01+GNi[0][i]*cy;      /* J_xy = dy/dxi */
>     J10 = J10+GNi[1][i]*cx;      /* J_yx = dx/deta */
>     J11 = J11+GNi[1][i]*cy;      /* J_yy = dy/deta */
>   }
>   J = (J00*J11)-(J01*J10);
>
>   iJ00 =  J11/J;
>   iJ01 = -J01/J;
>   iJ10 = -J10/J;
>   iJ11 =  J00/J;
>
>   for (i = 0; i < NODES_PER_EL; i++) {
>     GNx[0][i] = GNi[0][i]*iJ00+GNi[1][i]*iJ01;
>     GNx[1][i] = GNi[0][i]*iJ10+GNi[1][i]*iJ11;
>   }
>
>   *det_J = J;
> }
>
> static void ConstructGaussQuadrature(PetscInt *ngp,PetscScalar gp_xi[][2],PetscScalar gp_weight[])
> {
>   *ngp         = 4;
>   gp_xi[0][0]  = -0.57735026919;gp_xi[0][1] = -0.57735026919;
>   gp_xi[1][0]  = -0.57735026919;gp_xi[1][1] =  0.57735026919;
>   gp_xi[2][0]  =  0.57735026919;gp_xi[2][1] =  0.57735026919;
>   gp_xi[3][0]  =  0.57735026919;gp_xi[3][1] = -0.57735026919;
>   gp_weight[0] = 1.0;
>   gp_weight[1] = 1.0;
>   gp_weight[2] = 1.0;
>   gp_weight[3] = 1.0;
> }
>
>
> /* procs to the left claim the ghost node as their element */
> #undef __FUNCT__
> #define __FUNCT__ "DMDAGetLocalElementSize"
> static PetscErrorCode DMDAGetLocalElementSize(DM da,PetscInt *mxl,PetscInt *myl,PetscInt *mzl)
> {
>   PetscInt m,n,p,M,N,P;
>   PetscInt sx,sy,sz;
>
>   PetscFunctionBeginUser;
>   DMDAGetInfo(da,0,&M,&N,&P,0,0,0,0,0,0,0,0,0);
>   DMDAGetCorners(da,&sx,&sy,&sz,&m,&n,&p);
>
>   if (mxl != NULL) {
>     *mxl = m;
>     if ((sx+m) == M) *mxl = m-1;  /* last proc */
>   }
>   if (myl != NULL) {
>     *myl = n;
>     if ((sy+n) == N) *myl = n-1;  /* last proc */
>   }
>   if (mzl != NULL) {
>     *mzl = p;
>     if ((sz+p) == P) *mzl = p-1;  /* last proc */
>   }
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "DMDAGetElementCorners"
> static PetscErrorCode DMDAGetElementCorners(DM da,PetscInt *sx,PetscInt *sy,PetscInt *sz,PetscInt *mx,PetscInt *my,PetscInt *mz)
> {
>   PetscInt si,sj,sk;
>
>   PetscFunctionBeginUser;
>   DMDAGetGhostCorners(da,&si,&sj,&sk,0,0,0);
>
>   *sx = si;
>   if (si) *sx = si+1;
>
>   *sy = sj;
>   if (sj) *sy = sj+1;
>
>   if (sk) {
>     *sz = sk;
>     if (sk != 0) *sz = sk+1;
>   }
>
>   DMDAGetLocalElementSize(da,mx,my,mz);
>   PetscFunctionReturn(0);
> }
>
> /*
> i,j are the element indices
> The unknown is a vector quantity.
> The s[].c is used to indicate the degree of freedom.
> */
> #undef __FUNCT__
> #define __FUNCT__ "DMDAGetElementEqnums_up"
> static PetscErrorCode DMDAGetElementEqnums_up(MatStencil s_u[],MatStencil s_p[],PetscInt i,PetscInt j)
> {
>   PetscFunctionBeginUser;
>   /* velocity */
>   /* node 0 */
>   s_u[0].i = i;s_u[0].j = j;s_u[0].c = 0;                         /* Vx0 */
>   s_u[1].i = i;s_u[1].j = j;s_u[1].c = 1;                         /* Vy0 */
>
>   /* node 1 */
>   s_u[2].i = i;s_u[2].j = j+1;s_u[2].c = 0;                         /* Vx1 */
>   s_u[3].i = i;s_u[3].j = j+1;s_u[3].c = 1;                         /* Vy1 */
>
>   /* node 2 */
>   s_u[4].i = i+1;s_u[4].j = j+1;s_u[4].c = 0;                         /* Vx2 */
>   s_u[5].i = i+1;s_u[5].j = j+1;s_u[5].c = 1;                         /* Vy2 */
>
>   /* node 3 */
>   s_u[6].i = i+1;s_u[6].j = j;s_u[6].c = 0;                         /* Vx3 */
>   s_u[7].i = i+1;s_u[7].j = j;s_u[7].c = 1;                         /* Vy3 */
>
>
>   /* pressure */
>   s_p[0].i = i;s_p[0].j = j;s_p[0].c = 2;                         /* P0 */
>   s_p[1].i = i;s_p[1].j = j+1;s_p[1].c = 2;                         /* P0 */
>   s_p[2].i = i+1;s_p[2].j = j+1;s_p[2].c = 2;                         /* P1 */
>   s_p[3].i = i+1;s_p[3].j = j;s_p[3].c = 2;                         /* P1 */
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "DMDAGetElementOwnershipRanges2d"
> static PetscErrorCode DMDAGetElementOwnershipRanges2d(DM da,PetscInt **_lx,PetscInt **_ly)
> {
>   PetscErrorCode ierr;
>   PetscMPIInt    rank;
>   PetscInt       proc_I,proc_J;
>   PetscInt       cpu_x,cpu_y;
>   PetscInt       local_mx,local_my;
>   Vec            vlx,vly;
>   PetscInt       *LX,*LY,i;
>   PetscScalar    *_a;
>   Vec            V_SEQ;
>   VecScatter     ctx;
>
>   PetscFunctionBeginUser;
>   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
>
>   DMDAGetInfo(da,0,0,0,0,&cpu_x,&cpu_y,0,0,0,0,0,0,0);
>
>   proc_J = rank/cpu_x;
>   proc_I = rank-cpu_x*proc_J;
>
>   ierr = PetscMalloc(sizeof(PetscInt)*cpu_x,&LX);CHKERRQ(ierr);
>   ierr = PetscMalloc(sizeof(PetscInt)*cpu_y,&LY);CHKERRQ(ierr);
>
>   ierr = DMDAGetLocalElementSize(da,&local_mx,&local_my,NULL);CHKERRQ(ierr);
>   ierr = VecCreate(PETSC_COMM_WORLD,&vlx);CHKERRQ(ierr);
>   ierr = VecSetSizes(vlx,PETSC_DECIDE,cpu_x);CHKERRQ(ierr);
>   ierr = VecSetFromOptions(vlx);CHKERRQ(ierr);
>
>   ierr = VecCreate(PETSC_COMM_WORLD,&vly);CHKERRQ(ierr);
>   ierr = VecSetSizes(vly,PETSC_DECIDE,cpu_y);CHKERRQ(ierr);
>   ierr = VecSetFromOptions(vly);CHKERRQ(ierr);
>
>   ierr = VecSetValue(vlx,proc_I,(PetscScalar)(local_mx+1.0e-9),INSERT_VALUES);CHKERRQ(ierr);
>   ierr = VecSetValue(vly,proc_J,(PetscScalar)(local_my+1.0e-9),INSERT_VALUES);CHKERRQ(ierr);
>   ierr = VecAssemblyBegin(vlx);VecAssemblyEnd(vlx);CHKERRQ(ierr);
>   ierr = VecAssemblyBegin(vly);VecAssemblyEnd(vly);CHKERRQ(ierr);
>
>
>
>   ierr = VecScatterCreateToAll(vlx,&ctx,&V_SEQ);CHKERRQ(ierr);
>   ierr = VecScatterBegin(ctx,vlx,V_SEQ,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
>   ierr = VecScatterEnd(ctx,vlx,V_SEQ,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
>   ierr = VecGetArray(V_SEQ,&_a);CHKERRQ(ierr);
>   for (i = 0; i < cpu_x; i++) LX[i] = (PetscInt)PetscRealPart(_a[i]);
>   ierr = VecRestoreArray(V_SEQ,&_a);CHKERRQ(ierr);
>   ierr = VecScatterDestroy(&ctx);CHKERRQ(ierr);
>   ierr = VecDestroy(&V_SEQ);CHKERRQ(ierr);
>
>   ierr = VecScatterCreateToAll(vly,&ctx,&V_SEQ);CHKERRQ(ierr);
>   ierr = VecScatterBegin(ctx,vly,V_SEQ,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
>   ierr = VecScatterEnd(ctx,vly,V_SEQ,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
>   ierr = VecGetArray(V_SEQ,&_a);CHKERRQ(ierr);
>   for (i = 0; i < cpu_y; i++) LY[i] = (PetscInt)PetscRealPart(_a[i]);
>   ierr = VecRestoreArray(V_SEQ,&_a);CHKERRQ(ierr);
>   ierr = VecScatterDestroy(&ctx);CHKERRQ(ierr);
>   ierr = VecDestroy(&V_SEQ);CHKERRQ(ierr);
>
>
>
>   *_lx = LX;
>   *_ly = LY;
>
>   ierr = VecDestroy(&vlx);CHKERRQ(ierr);
>   ierr = VecDestroy(&vly);CHKERRQ(ierr);
>   PetscFunctionReturn(0);
> }
>
> static PetscInt ASS_MAP_wIwDI_uJuDJ(PetscInt wi,PetscInt wd,PetscInt w_NPE,PetscInt w_dof,PetscInt ui,PetscInt ud,PetscInt u_NPE,PetscInt u_dof)
> {
>   PetscInt ij;
>   PetscInt r,c,nc;
>
>   nc = u_NPE*u_dof;
>
>   r = w_dof*wi+wd;
>   c = u_dof*ui+ud;
>
>   ij = r*nc+c;
>
>   return ij;
> }
>
> static void FormStressOperatorQ1(PetscScalar Ke[],PetscScalar coords[],PetscScalar eta[])
> {
>   PetscInt    ngp;
>   PetscScalar gp_xi[GAUSS_POINTS][2];
>   PetscScalar gp_weight[GAUSS_POINTS];
>   PetscInt    p,i,j,k;
>   PetscScalar GNi_p[NSD][NODES_PER_EL],GNx_p[NSD][NODES_PER_EL];
>   PetscScalar J_p,tildeD[3];
>   PetscScalar B[3][U_DOFS*NODES_PER_EL];
>
>
>   /* define quadrature rule */
>   ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
>
>   /* evaluate integral */
>   for (p = 0; p < ngp; p++) {
>     FEBasisQ1Evaluate_dNidxi(gp_xi[p],GNi_p);
>     FEBasisQ1EvaluateGeometry(GNi_p,GNx_p,coords,&J_p);
>
>     for (i = 0; i < NODES_PER_EL; i++) {
>       PetscScalar d_dx_i = GNx_p[0][i];
>       PetscScalar d_dy_i = GNx_p[1][i];
>
>       B[0][2*i] = d_dx_i;B[0][2*i+1] = 0.0;
>       B[1][2*i] = 0.0;B[1][2*i+1] = d_dy_i;
>       B[2][2*i] = d_dy_i;B[2][2*i+1] = d_dx_i;
>     }
>
>
>     tildeD[0] = 2.0*gp_weight[p]*J_p*eta[p];
>     tildeD[1] = 2.0*gp_weight[p]*J_p*eta[p];
>     tildeD[2] =       gp_weight[p]*J_p*eta[p];
>
>     /* form Bt tildeD B */
>     /*
>     Ke_ij = Bt_ik . D_kl . B_lj
>     = B_ki . D_kl . B_lj
>     = B_ki . D_kk . B_kj
>     */
>     for (i = 0; i < 8; i++) {
>       for (j = 0; j < 8; j++) {
>         for (k = 0; k < 3; k++) { /* Note D is diagonal for stokes */
>           Ke[i+8*j] = Ke[i+8*j]+B[k][i]*tildeD[k]*B[k][j];
>         }
>       }
>     }
>   }
> }
>
> static void FormGradientOperatorQ1Q1(PetscScalar Ke[],PetscScalar coords[])
> {
>   PetscInt    ngp;
>   PetscScalar gp_xi[GAUSS_POINTS][2];
>   PetscScalar gp_weight[GAUSS_POINTS];
>   PetscInt    p,i,j,di;
>   PetscScalar Ni_p[NODES_PER_EL];
>   PetscScalar GNi_p[NSD][NODES_PER_EL],GNx_p[NSD][NODES_PER_EL];
>   PetscScalar J_p,fac;
>
>
>   /* define quadrature rule */
>   ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
>
>   /* evaluate integral */
>   for (p = 0; p < ngp; p++) {
>     FEBasisQ1Evaluate_Ni(gp_xi[p],Ni_p);
>     FEBasisQ1Evaluate_dNidxi(gp_xi[p],GNi_p);
>     FEBasisQ1EvaluateGeometry(GNi_p,GNx_p,coords,&J_p);
>     fac = gp_weight[p]*J_p;
>
>     for (i = 0; i < NODES_PER_EL; i++) { /* u nodes */
>       for (di = 0; di < NSD; di++) { /* u dofs */
>         for (j = 0; j < 4; j++) {  /* p nodes, p dofs = 1 (ie no loop) */
>           PetscInt IJ;
>           /*     Ke[4*u_idx+j] = Ke[4*u_idx+j] - GNx_p[di][i] * Ni_p[j] * fac; */
>           IJ = ASS_MAP_wIwDI_uJuDJ(i,di,NODES_PER_EL,2,j,0,NODES_PER_EL,1);
>
>           Ke[IJ] = Ke[IJ]-GNx_p[di][i]*Ni_p[j]*fac;
>         }
>       }
>     }
>   }
> }
>
> static void FormGradientOperatorQ1P0(PetscScalar Ke[],PetscScalar coords[])
> {
>   PetscInt    ngp;
>   PetscScalar gp_xi[GAUSS_POINTS][2];
>   PetscScalar gp_weight[GAUSS_POINTS];
>   PetscInt    p,i,j,di,np;
>   PetscScalar Ni_p[1];
>   PetscScalar GNi_p[NSD][NODES_PER_EL],GNx_p[NSD][NODES_PER_EL];
>   PetscScalar J_p,fac;
> 	
> 	
>   /* define quadrature rule */
>   ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
> 	
>   /* evaluate integral */
> 	np = 1;
>   for (p = 0; p < ngp; p++) {
>     FEBasisQ1Evaluate_Mi_P0(gp_xi[p],Ni_p);
>     FEBasisQ1Evaluate_dNidxi(gp_xi[p],GNi_p);
>     FEBasisQ1EvaluateGeometry(GNi_p,GNx_p,coords,&J_p);
>     fac = gp_weight[p]*J_p;
> 		
>     for (i = 0; i < NODES_PER_EL; i++) { /* u nodes */
>       for (di = 0; di < NSD; di++) { /* u dofs */
>         for (j = 0; j < np; j++) {  /* p nodes, p dofs = 1 (ie no loop) */
>           PetscInt IJ;
>           /*     Ke[4*u_idx+j] = Ke[4*u_idx+j] - GNx_p[di][i] * Ni_p[j] * fac; */
>           IJ = ASS_MAP_wIwDI_uJuDJ(i,di,NODES_PER_EL,2, j,0,np,1);
> 					
>           Ke[IJ] = Ke[IJ]-GNx_p[di][i]*Ni_p[j]*fac;
>         }
>       }
>     }
>   }
> }
>
> static void FormGradientOperatorQ1mP1(PetscScalar Ke[],PetscScalar coords[],PetscScalar all_Mi_p[GAUSS_POINTS][3])
> {
>   PetscInt    ngp;
>   PetscScalar gp_xi[GAUSS_POINTS][2];
>   PetscScalar gp_weight[GAUSS_POINTS];
>   PetscInt    p,i,j,di,np;
>   PetscScalar GNi_p[NSD][NODES_PER_EL],GNx_p[NSD][NODES_PER_EL];
>   PetscScalar J_p,fac;
> 	
> 	
>   /* define quadrature rule */
>   ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
> 	
>   /* evaluate integral */
> 	np = 3;
>   for (p = 0; p < ngp; p++) {
> 		PetscScalar *Mi_p = all_Mi_p[p];
>
> 		//FEBasisQ1Evaluate_Mi_P1rel(gp_xi[p],coords,Ni_p);
>
>     FEBasisQ1Evaluate_dNidxi(gp_xi[p],GNi_p);
>     FEBasisQ1EvaluateGeometry(GNi_p,GNx_p,coords,&J_p);
>     fac = gp_weight[p]*J_p;
> 		
> 		//printf("p=%d: M_p = %+1.4e %+1.4e %+1.4e \n",p,Mi_p[0],Mi_p[1],Mi_p[2]);
> 		
>     for (i = 0; i < NODES_PER_EL; i++) { /* u nodes */
>       for (di = 0; di < NSD; di++) { /* u dofs */
>         for (j = 0; j < np; j++) {  /* p nodes, p dofs = 1 (ie no loop) */
>           PetscInt IJ;
>           /*     Ke[4*u_idx+j] = Ke[4*u_idx+j] - GNx_p[di][i] * Ni_p[j] * fac; */
>           IJ = ASS_MAP_wIwDI_uJuDJ(i,di,NODES_PER_EL,2, j,0,np,1);
> 					
>           Ke[IJ] = Ke[IJ] - GNx_p[di][i] * Mi_p[j] * fac;
>         }
>       }
>     }
>   }
> }
>
> static void FormGradientOperatorQ1mP0(PetscScalar Ke[],PetscScalar coords[],PetscScalar all_Mi_p[GAUSS_POINTS][1])
> {
>   PetscInt    ngp;
>   PetscScalar gp_xi[GAUSS_POINTS][2];
>   PetscScalar gp_weight[GAUSS_POINTS];
>   PetscInt    p,i,j,di,np;
>   PetscScalar GNi_p[NSD][NODES_PER_EL],GNx_p[NSD][NODES_PER_EL];
>   PetscScalar J_p,fac;
> 	
> 	
>   /* define quadrature rule */
>   ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
> 	
>   /* evaluate integral */
> 	np = 1;
>   for (p = 0; p < ngp; p++) {
> 		PetscScalar *Mi_p = all_Mi_p[p];
> 		
> 		//FEBasisQ1Evaluate_Mi_P1rel(gp_xi[p],coords,Ni_p);
> 		
>     FEBasisQ1Evaluate_dNidxi(gp_xi[p],GNi_p);
>     FEBasisQ1EvaluateGeometry(GNi_p,GNx_p,coords,&J_p);
>     fac = gp_weight[p]*J_p;
> 		
> 		//printf("p=%d: M_p = %+1.4e %+1.4e %+1.4e \n",p,Mi_p[0],Mi_p[1],Mi_p[2]);
> 		
>     for (i = 0; i < NODES_PER_EL; i++) { /* u nodes */
>       for (di = 0; di < NSD; di++) { /* u dofs */
>         for (j = 0; j < np; j++) {  /* p nodes, p dofs = 1 (ie no loop) */
>           PetscInt IJ;
>           /*     Ke[4*u_idx+j] = Ke[4*u_idx+j] - GNx_p[di][i] * Ni_p[j] * fac; */
>           IJ = ASS_MAP_wIwDI_uJuDJ(i,di,NODES_PER_EL,2, j,0,np,1);
> 					
>           Ke[IJ] = Ke[IJ] - GNx_p[di][i] * Mi_p[j] * fac;
>         }
>       }
>     }
>   }
> }
>
> static void FormDivergenceOperator(PetscScalar De[],PetscScalar Ge[],PetscInt nv,PetscInt np)
> {
>   PetscInt    i,j;
>   PetscInt    nr_g,nc_g;
>
>   nr_g = U_DOFS*nv;
>   nc_g = P_DOFS*np;
>
>   for (i = 0; i < nr_g; i++) {
>     for (j = 0; j < nc_g; j++) {
>       De[nr_g*j+i] = Ge[nc_g*i+j];
>     }
>   }
> }
>
> static void FormStabilisationOperatorQ1(PetscScalar Ke[],PetscScalar coords[],PetscScalar eta[])
> {
>   PetscInt    ngp;
>   PetscScalar gp_xi[GAUSS_POINTS][2];
>   PetscScalar gp_weight[GAUSS_POINTS];
>   PetscInt    p,i,j;
>   PetscScalar Ni_p[NODES_PER_EL];
>   PetscScalar GNi_p[NSD][NODES_PER_EL],GNx_p[NSD][NODES_PER_EL];
>   PetscScalar J_p,fac,eta_avg;
>
>
>   /* define quadrature rule */
>   ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
>
>   /* evaluate integral */
>   for (p = 0; p < ngp; p++) {
>     FEBasisQ1Evaluate_Ni(gp_xi[p],Ni_p);
>     FEBasisQ1Evaluate_dNidxi(gp_xi[p],GNi_p);
>     FEBasisQ1EvaluateGeometry(GNi_p,GNx_p,coords,&J_p);
>     fac = gp_weight[p]*J_p;
>
>     for (i = 0; i < NODES_PER_EL; i++) {
>       for (j = 0; j < NODES_PER_EL; j++) {
>         Ke[NODES_PER_EL*i+j] = Ke[NODES_PER_EL*i+j]-fac*(Ni_p[i]*Ni_p[j]-0.0625);
>       }
>     }
>   }
>
>   /* scale */
>   eta_avg = 0.0;
>   for (p = 0; p < ngp; p++) eta_avg += eta[p];
>   eta_avg = (1.0/((PetscScalar)ngp))*eta_avg;
>   fac     = 1.0/eta_avg;
>   for (i = 0; i < NODES_PER_EL; i++) {
>     for (j = 0; j < NODES_PER_EL; j++) {
>       Ke[NODES_PER_EL*i+j] = fac*Ke[NODES_PER_EL*i+j];
>     }
>   }
> }
>
> static void FormScaledMassMatrixOperatorQ1Q1(PetscScalar Ke[],PetscScalar coords[],PetscScalar eta[])
> {
>   PetscInt    ngp;
>   PetscScalar gp_xi[GAUSS_POINTS][2];
>   PetscScalar gp_weight[GAUSS_POINTS];
>   PetscInt    p,i,j;
>   PetscScalar Ni_p[NODES_PER_EL];
>   PetscScalar GNi_p[NSD][NODES_PER_EL],GNx_p[NSD][NODES_PER_EL];
>   PetscScalar J_p,fac,eta_avg;
>
>
>   /* define quadrature rule */
>   ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
>
>   /* evaluate integral */
>   for (p = 0; p < ngp; p++) {
>     FEBasisQ1Evaluate_Ni(gp_xi[p],Ni_p);
>     FEBasisQ1Evaluate_dNidxi(gp_xi[p],GNi_p);
>     FEBasisQ1EvaluateGeometry(GNi_p,GNx_p,coords,&J_p);
>     fac = gp_weight[p]*J_p;
>
>     for (i = 0; i < NODES_PER_EL; i++) {
>       for (j = 0; j < NODES_PER_EL; j++) {
>         Ke[NODES_PER_EL*i+j] = Ke[NODES_PER_EL*i+j]-fac*Ni_p[i]*Ni_p[j];
>       }
>     }
>   }
>
>   /* scale */
>   eta_avg = 0.0;
>   for (p = 0; p < ngp; p++) eta_avg += eta[p];
>   eta_avg = (1.0/((PetscScalar)ngp))*eta_avg;
>   fac     = 1.0/eta_avg;
>   for (i = 0; i < NODES_PER_EL; i++) {
>     for (j = 0; j < NODES_PER_EL; j++) {
>       Ke[NODES_PER_EL*i+j] = fac*Ke[NODES_PER_EL*i+j];
>     }
>   }
> }
>
> static void FormScaledMassMatrixOperatorP0P0(PetscScalar Ke[],PetscScalar coords[],PetscScalar eta[])
> {
>   PetscInt    ngp;
>   PetscScalar gp_xi[GAUSS_POINTS][2];
>   PetscScalar gp_weight[GAUSS_POINTS];
>   PetscInt    p,i,j,np;
>   PetscScalar Ni_p[NODES_PER_EL];
>   PetscScalar GNi_p[NSD][NODES_PER_EL],GNx_p[NSD][NODES_PER_EL];
>   PetscScalar J_p,fac,eta_avg;
> 	
> 	
>   /* define quadrature rule */
>   ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
> 	
>   /* evaluate integral */
> 	np = 1;
>   for (p = 0; p < ngp; p++) {
>     FEBasisQ1Evaluate_Mi_P0(gp_xi[p],Ni_p);
>
>     FEBasisQ1Evaluate_dNidxi(gp_xi[p],GNi_p);
>     FEBasisQ1EvaluateGeometry(GNi_p,GNx_p,coords,&J_p);
>     fac = gp_weight[p]*J_p;
> 		
>     for (i = 0; i < np; i++) {
>       for (j = 0; j < np; j++) {
>         Ke[np*i+j] = Ke[np*i+j]-fac*Ni_p[i]*Ni_p[j];
>       }
>     }
>   }
> 	
>   /* scale */
>   eta_avg = 0.0;
>   for (p = 0; p < ngp; p++) eta_avg += eta[p];
>   eta_avg = (1.0/((PetscScalar)ngp))*eta_avg;
>   fac     = 1.0/eta_avg;
>
>   for (i = 0; i < np*np; i++) {
>       Ke[i] = fac*Ke[i];
>   }
> }
>
> static void FormScaledMassMatrixOperatorP1P1(PetscScalar Ke[],PetscScalar coords[],PetscScalar eta[],
> 																						 PetscScalar all_Mi_p[GAUSS_POINTS][3])
> {
>   PetscInt    ngp;
>   PetscScalar gp_xi[GAUSS_POINTS][2];
>   PetscScalar gp_weight[GAUSS_POINTS];
>   PetscInt    p,i,j,np;
>   PetscScalar Ni_p[NODES_PER_EL];
>   PetscScalar GNi_p[NSD][NODES_PER_EL],GNx_p[NSD][NODES_PER_EL];
>   PetscScalar J_p,fac,eta_avg;
> 	
> 	
>   /* define quadrature rule */
>   ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
> 	
>   /* evaluate integral */
> 	np = 3;
>   for (p = 0; p < ngp; p++) {
>     PetscScalar *Mi_p = all_Mi_p[p];
> 		
> 		//FEBasisQ1Evaluate_Mi_P1rel(gp_xi[p],coords,Ni_p);
> 		
>     FEBasisQ1Evaluate_dNidxi(gp_xi[p],GNi_p);
>     FEBasisQ1EvaluateGeometry(GNi_p,GNx_p,coords,&J_p);
>     fac = gp_weight[p]*J_p;
> 		
>     for (i = 0; i < np; i++) {
>       for (j = 0; j < np; j++) {
>         Ke[np*i+j] = Ke[np*i+j] - fac*Mi_p[i]*Mi_p[j];
>       }
>     }
>   }
> 	
>   /* scale */
>   eta_avg = 0.0;
>   for (p = 0; p < ngp; p++) eta_avg += eta[p];
>   eta_avg = (1.0/((PetscScalar)ngp))*eta_avg;
>   fac     = 1.0/eta_avg;
> 	
>   for (i = 0; i < np*np; i++) {
> 		Ke[i] = fac*Ke[i];
>   }
> }
>
> static void FormScaledMassMatrixOperatorP1P1_const(PetscScalar Ke[],PetscScalar coords[],PetscScalar kappa,
> 																						 PetscScalar all_Mi_p[GAUSS_POINTS][3])
> {
>   PetscInt    ngp;
>   PetscScalar gp_xi[GAUSS_POINTS][2];
>   PetscScalar gp_weight[GAUSS_POINTS];
>   PetscInt    p,i,j,np;
>   PetscScalar Ni_p[NODES_PER_EL];
>   PetscScalar GNi_p[NSD][NODES_PER_EL],GNx_p[NSD][NODES_PER_EL];
>   PetscScalar J_p,fac,eta_avg;
> 	
> 	
>   /* define quadrature rule */
>   ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
> 	
>   /* evaluate integral */
> 	np = 3;
>   for (p = 0; p < ngp; p++) {
>     PetscScalar *Mi_p = all_Mi_p[p];
> 		
> 		//FEBasisQ1Evaluate_Mi_P1rel(gp_xi[p],coords,Ni_p);
> 		
>     FEBasisQ1Evaluate_dNidxi(gp_xi[p],GNi_p);
>     FEBasisQ1EvaluateGeometry(GNi_p,GNx_p,coords,&J_p);
>     fac = gp_weight[p]*J_p;
> 		
>     for (i = 0; i < np; i++) {
>       for (j = 0; j < np; j++) {
>         Ke[np*i+j] = Ke[np*i+j] - fac*Mi_p[i]*Mi_p[j];
>       }
>     }
>   }
> 	
>   /* scale */
>   fac = -1.0/kappa;
>   for (i = 0; i < np*np; i++) {
> 		Ke[i] = fac*Ke[i];
>   }
> }
>
> static void _FormScaledMassMatrixOperatorP1P1(PetscScalar Ke[],PetscScalar coords[],PetscScalar eta[])
> {
>   PetscInt    ngp;
>   PetscScalar gp_xi[GAUSS_POINTS][2];
>   PetscScalar gp_weight[GAUSS_POINTS];
>   PetscInt    p,i,j,np;
>   PetscScalar Mi_p[3];
>   PetscScalar GNi_p[NSD][NODES_PER_EL],GNx_p[NSD][NODES_PER_EL];
>   PetscScalar J_p,fac,eta_avg;
> 	
> 	
>   /* define quadrature rule */
>   ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
> 	
>   /* evaluate integral */
> 	np = 3;
>   for (p = 0; p < ngp; p++) {
> 		FEBasisQ1Evaluate_Mi_P1rel(gp_xi[p],coords,Mi_p);
> 		
>     FEBasisQ1Evaluate_dNidxi(gp_xi[p],GNi_p);
>     FEBasisQ1EvaluateGeometry(GNi_p,GNx_p,coords,&J_p);
>     fac = gp_weight[p]*J_p;
> 		
>     for (i = 0; i < np; i++) {
>       for (j = 0; j < np; j++) {
>         Ke[np*i+j] = Ke[np*i+j]-fac*Mi_p[i]*Mi_p[j];
>       }
>     }
>   }
> 	
>   /* scale */
>   eta_avg = 0.0;
>   for (p = 0; p < ngp; p++) eta_avg += eta[p];
>   eta_avg = (1.0/((PetscScalar)ngp))*eta_avg;
>   fac     = 1.0/eta_avg;
> 	
>   for (i = 0; i < np*np; i++) {
> 		Ke[i] = fac*Ke[i];
>   }
> }
>
> static void FormEdgeStabP0(PetscScalar Ke[],PetscScalar coords[],PetscScalar eta[],PetscBool eta_scale)
> {
>   PetscInt    ngp;
>   PetscScalar gp_xi[GAUSS_POINTS][2];
>   PetscScalar gp_weight[GAUSS_POINTS];
>   PetscInt    p,i,j,np;
>   PetscScalar Ni_p[NODES_PER_EL];
>   PetscScalar GNi_p[NSD][NODES_PER_EL],GNx_p[NSD][NODES_PER_EL];
>   PetscScalar J_p,fac,eta_avg;
> 	PetscScalar Ce[16],vol;
> 	
>   /* define quadrature rule */
>   ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
> 	
>   /* evaluate integral */
> 	Ce[0] = 2.0;
> 	Ce[1] = -1.0;
> 	Ce[2] = 0.0;
> 	Ce[3] = -1.0;
> 	
> 	Ce[4] = -1.0;
> 	Ce[5] = 2.0;
> 	Ce[6] = -1.0;
> 	Ce[7] = 0.0;
> 	
> 	Ce[8] = 0.0;
> 	Ce[9] = -1.0;
> 	Ce[10] = 2.0;
> 	Ce[11] = -1.0;
> 	
> 	Ce[12] = -1.0;
> 	Ce[13] = 0.0;
> 	Ce[14] = -1.0;
> 	Ce[15] = 2.0;
> 	
> 	
> 	vol = 0.0;
> 	np = 1;
>   for (p = 0; p < ngp; p++) {
>     FEBasisQ1Evaluate_Mi_P0(gp_xi[p],Ni_p);
> 		
>     FEBasisQ1Evaluate_dNidxi(gp_xi[p],GNi_p);
>     FEBasisQ1EvaluateGeometry(GNi_p,GNx_p,coords,&J_p);
>     fac = gp_weight[p]*J_p;
>
> 		
> 		for (i=0; i<16; i++) {
> 			Ke[i] = Ke[i] - 0.25 * Ce[i] * fac;
> 		}
> 		vol = vol + fac;
>   }
> 	
> 	for (i=0; i<16; i++) {
> 		Ke[i] = -0.25 * vol * Ce[i];
> 	}
> 	
>   /* scale */
> 	if (eta_scale) {
> 		eta_avg = 0.0;
> 		for (p = 0; p < ngp; p++) {
> 			eta_avg += eta[p];
> 		}
> 		
> 		for (i = 0; i < np*np; i++) {
> 			Ke[i] = fac*Ke[i];
> 		}
> 	}
> }
>
> static void FormNullspaceStabP0(PetscScalar Ke[],PetscScalar coords[],PetscScalar eta[])
> {
>   PetscInt    ngp;
>   PetscScalar gp_xi[GAUSS_POINTS][2];
>   PetscScalar gp_weight[GAUSS_POINTS];
>   PetscInt    p,i,j,np;
>   PetscScalar Ni_p[NODES_PER_EL];
>   PetscScalar GNi_p[NSD][NODES_PER_EL],GNx_p[NSD][NODES_PER_EL];
>   PetscScalar J_p,fac,eta_avg;
> 	PetscScalar Ce[16],vol;
> 	
>   /* define quadrature rule */
>   ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
> 	
>   /* evaluate integral */
> 	Ce[0]  =  1.0;	Ce[1]  = -1.0;	Ce[2]  =  1.0;	Ce[3]  = -1.0;
> 	Ce[4]  = -1.0;	Ce[5]  =  1.0;	Ce[6]  = -1.0;	Ce[7]  =  1.0;
> 	Ce[8]  =  1.0;	Ce[9]  = -1.0;	Ce[10] =  1.0;	Ce[11] = -1.0;
> 	Ce[12] = -1.0;	Ce[13] =  1.0;	Ce[14] = -1.0;	Ce[15] =  1.0;
> 	
> 	
> 	vol = 0.0;
> 	np = 1;
>   for (p = 0; p < ngp; p++) {
>     FEBasisQ1Evaluate_Mi_P0(gp_xi[p],Ni_p);
> 		
>     FEBasisQ1Evaluate_dNidxi(gp_xi[p],GNi_p);
>     FEBasisQ1EvaluateGeometry(GNi_p,GNx_p,coords,&J_p);
>     fac = gp_weight[p]*J_p;
> 		
> 		vol = vol + fac;
>   }
> 	
> 	for (i=0; i<16; i++) {
> 		Ke[i] = -0.25 * vol * Ce[i];
> 	}
> 	
> }
>
> static void FormMomentumRhsQ1(PetscScalar Fe[],PetscScalar coords[],PetscScalar fx[],PetscScalar fy[])
> {
>   PetscInt    ngp;
>   PetscScalar gp_xi[GAUSS_POINTS][2];
>   PetscScalar gp_weight[GAUSS_POINTS];
>   PetscInt    p,i;
>   PetscScalar Ni_p[NODES_PER_EL];
>   PetscScalar GNi_p[NSD][NODES_PER_EL],GNx_p[NSD][NODES_PER_EL];
>   PetscScalar J_p,fac;
>
>
>   /* define quadrature rule */
>   ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
>
>   /* evaluate integral */
>   for (p = 0; p < ngp; p++) {
>     FEBasisQ1Evaluate_Ni(gp_xi[p],Ni_p);
>     FEBasisQ1Evaluate_dNidxi(gp_xi[p],GNi_p);
>     FEBasisQ1EvaluateGeometry(GNi_p,GNx_p,coords,&J_p);
>     fac = gp_weight[p]*J_p;
>
>     for (i = 0; i < NODES_PER_EL; i++) {
>       Fe[NSD*i]   += fac*Ni_p[i]*fx[p];
>       Fe[NSD*i+1] += fac*Ni_p[i]*fy[p];
>     }
>   }
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "GetElementCoords"
> static PetscErrorCode GetElementCoords(DMDACoor2d **_coords,PetscInt ei,PetscInt ej,PetscScalar el_coords[])
> {
>   PetscFunctionBeginUser;
>   /* get coords for the element */
>   el_coords[NSD*0+0] = _coords[ej][ei].x;el_coords[NSD*0+1] = _coords[ej][ei].y;
>   el_coords[NSD*1+0] = _coords[ej+1][ei].x;el_coords[NSD*1+1] = _coords[ej+1][ei].y;
>   el_coords[NSD*2+0] = _coords[ej+1][ei+1].x;el_coords[NSD*2+1] = _coords[ej+1][ei+1].y;
>   el_coords[NSD*3+0] = _coords[ej][ei+1].x;el_coords[NSD*3+1] = _coords[ej][ei+1].y;
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "GetElementCoordsMacro"
> static PetscErrorCode GetElementCoordsMacro(DMDACoor2d **_coords,PetscInt ei,PetscInt ej,PetscScalar el_coords[])
> {
>   PetscFunctionBeginUser;
>   /* get coords for the element */
>   el_coords[NSD*0+0] = _coords[ej][ei].x;     el_coords[NSD*0+1] = _coords[ej][ei].y;
>   el_coords[NSD*1+0] = _coords[ej+2][ei].x;   el_coords[NSD*1+1] = _coords[ej+2][ei].y;
>   el_coords[NSD*2+0] = _coords[ej+2][ei+2].x; el_coords[NSD*2+1] = _coords[ej+2][ei+2].y;
>   el_coords[NSD*3+0] = _coords[ej][ei+2].x;   el_coords[NSD*3+1] = _coords[ej][ei+2].y;
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "AssembleA_Stokes"
> static PetscErrorCode AssembleA_Stokes(Mat A,DM stokes_da,DM properties_da,Vec properties)
> {
>   DM                     cda;
>   Vec                    coords;
>   DMDACoor2d             **_coords;
>   MatStencil             u_eqn[NODES_PER_EL*U_DOFS]; /* 2 degrees of freedom */
>   MatStencil             p_eqn[NODES_PER_EL*P_DOFS]; /* 1 degrees of freedom */
>   PetscInt               sex,sey,mx,my;
>   PetscInt               ei,ej;
>   PetscScalar            Ae[NODES_PER_EL*U_DOFS*NODES_PER_EL*U_DOFS];
>   PetscScalar            Ge[NODES_PER_EL*U_DOFS*NODES_PER_EL*P_DOFS];
>   PetscScalar            De[NODES_PER_EL*P_DOFS*NODES_PER_EL*U_DOFS];
>   PetscScalar            Ce[NODES_PER_EL*P_DOFS*NODES_PER_EL*P_DOFS];
>   PetscScalar            el_coords[NODES_PER_EL*NSD];
>   Vec                    local_properties;
>   GaussPointCoefficients **props;
>   PetscScalar            *prop_eta;
>   PetscErrorCode         ierr;
>
>   PetscFunctionBeginUser;
>   /* setup for coords */
>   ierr = DMGetCoordinateDM(stokes_da,&cda);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(stokes_da,&coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cda,coords,&_coords);CHKERRQ(ierr);
>
>   /* setup for coefficients */
>   ierr = DMCreateLocalVector(properties_da,&local_properties);CHKERRQ(ierr);
>   ierr = DMGlobalToLocalBegin(properties_da,properties,INSERT_VALUES,local_properties);CHKERRQ(ierr);
>   ierr = DMGlobalToLocalEnd(properties_da,properties,INSERT_VALUES,local_properties);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(properties_da,local_properties,&props);CHKERRQ(ierr);
>
>   ierr = DMDAGetElementCorners(stokes_da,&sex,&sey,0,&mx,&my,0);CHKERRQ(ierr);
>   for (ej = sey; ej < sey+my; ej++) {
>     for (ei = sex; ei < sex+mx; ei++) {
>       /* get coords for the element */
>       GetElementCoords(_coords,ei,ej,el_coords);
>
>       /* get coefficients for the element */
>       prop_eta = props[ej][ei].eta;
>
>       /* initialise element stiffness matrix */
>       ierr = PetscMemzero(Ae,sizeof(PetscScalar)*NODES_PER_EL*U_DOFS*NODES_PER_EL*U_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(Ge,sizeof(PetscScalar)*NODES_PER_EL*U_DOFS*NODES_PER_EL*P_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(De,sizeof(PetscScalar)*NODES_PER_EL*P_DOFS*NODES_PER_EL*U_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(Ce,sizeof(PetscScalar)*NODES_PER_EL*P_DOFS*NODES_PER_EL*P_DOFS);CHKERRQ(ierr);
>
>       /* form element stiffness matrix */
>       FormStressOperatorQ1(Ae,el_coords,prop_eta);
>       FormGradientOperatorQ1Q1(Ge,el_coords);
>       FormDivergenceOperator(De,Ge,NODES_PER_EL,NODES_PER_EL);
>       FormStabilisationOperatorQ1(Ce,el_coords,prop_eta);
>
>       /* insert element matrix into global matrix */
>       ierr = DMDAGetElementEqnums_up(u_eqn,p_eqn,ei,ej);CHKERRQ(ierr);
>       ierr = MatSetValuesStencil(A,NODES_PER_EL*U_DOFS,u_eqn,NODES_PER_EL*U_DOFS,u_eqn,Ae,ADD_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValuesStencil(A,NODES_PER_EL*U_DOFS,u_eqn,NODES_PER_EL*P_DOFS,p_eqn,Ge,ADD_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValuesStencil(A,NODES_PER_EL*P_DOFS,p_eqn,NODES_PER_EL*U_DOFS,u_eqn,De,ADD_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValuesStencil(A,NODES_PER_EL*P_DOFS,p_eqn,NODES_PER_EL*P_DOFS,p_eqn,Ce,ADD_VALUES);CHKERRQ(ierr);
>     }
>   }
>   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>
>   ierr = DMDAVecRestoreArray(cda,coords,&_coords);CHKERRQ(ierr);
>
>   ierr = DMDAVecRestoreArray(properties_da,local_properties,&props);CHKERRQ(ierr);
>   ierr = VecDestroy(&local_properties);CHKERRQ(ierr);
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "AssembleA_PCStokes"
> static PetscErrorCode AssembleA_PCStokes(Mat A,DM stokes_da,DM properties_da,Vec properties)
> {
>   DM                     cda;
>   Vec                    coords;
>   DMDACoor2d             **_coords;
>   MatStencil             u_eqn[NODES_PER_EL*U_DOFS]; /* 2 degrees of freedom */
>   MatStencil             p_eqn[NODES_PER_EL*P_DOFS]; /* 1 degrees of freedom */
>   PetscInt               sex,sey,mx,my;
>   PetscInt               ei,ej;
>   PetscScalar            Ae[NODES_PER_EL*U_DOFS*NODES_PER_EL*U_DOFS];
>   PetscScalar            Ge[NODES_PER_EL*U_DOFS*NODES_PER_EL*P_DOFS];
>   PetscScalar            De[NODES_PER_EL*P_DOFS*NODES_PER_EL*U_DOFS];
>   PetscScalar            Ce[NODES_PER_EL*P_DOFS*NODES_PER_EL*P_DOFS];
>   PetscScalar            el_coords[NODES_PER_EL*NSD];
>   Vec                    local_properties;
>   GaussPointCoefficients **props;
>   PetscScalar            *prop_eta;
>   PetscErrorCode         ierr;
>
>   PetscFunctionBeginUser;
>   /* setup for coords */
>   ierr = DMGetCoordinateDM(stokes_da,&cda);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(stokes_da,&coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cda,coords,&_coords);CHKERRQ(ierr);
>
>   /* setup for coefficients */
>   ierr = DMCreateLocalVector(properties_da,&local_properties);CHKERRQ(ierr);
>   ierr = DMGlobalToLocalBegin(properties_da,properties,INSERT_VALUES,local_properties);CHKERRQ(ierr);
>   ierr = DMGlobalToLocalEnd(properties_da,properties,INSERT_VALUES,local_properties);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(properties_da,local_properties,&props);CHKERRQ(ierr);
>
>   ierr = DMDAGetElementCorners(stokes_da,&sex,&sey,0,&mx,&my,0);CHKERRQ(ierr);
>   for (ej = sey; ej < sey+my; ej++) {
>     for (ei = sex; ei < sex+mx; ei++) {
>       /* get coords for the element */
>       ierr = GetElementCoords(_coords,ei,ej,el_coords);CHKERRQ(ierr);
>
>       /* get coefficients for the element */
>       prop_eta = props[ej][ei].eta;
>
>       /* initialise element stiffness matrix */
>       ierr = PetscMemzero(Ae,sizeof(PetscScalar)*NODES_PER_EL*U_DOFS*NODES_PER_EL*U_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(Ge,sizeof(PetscScalar)*NODES_PER_EL*U_DOFS*NODES_PER_EL*P_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(De,sizeof(PetscScalar)*NODES_PER_EL*P_DOFS*NODES_PER_EL*U_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(Ce,sizeof(PetscScalar)*NODES_PER_EL*P_DOFS*NODES_PER_EL*P_DOFS);CHKERRQ(ierr);
>
>
>       /* form element stiffness matrix */
>       FormStressOperatorQ1(Ae,el_coords,prop_eta);
>       FormGradientOperatorQ1Q1(Ge,el_coords);
>       /*               FormDivergenceOperatorQ1(De, el_coords); */
>       FormScaledMassMatrixOperatorQ1Q1(Ce,el_coords,prop_eta);
>
>       /* insert element matrix into global matrix */
>       ierr = DMDAGetElementEqnums_up(u_eqn,p_eqn,ei,ej);CHKERRQ(ierr);
>       ierr = MatSetValuesStencil(A,NODES_PER_EL*U_DOFS,u_eqn,NODES_PER_EL*U_DOFS,u_eqn,Ae,ADD_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValuesStencil(A,NODES_PER_EL*U_DOFS,u_eqn,NODES_PER_EL*P_DOFS,p_eqn,Ge,ADD_VALUES);CHKERRQ(ierr);
>       /*     MatSetValuesStencil(A, NODES_PER_EL*P_DOFS,p_eqn, NODES_PER_EL*U_DOFS,u_eqn, De, ADD_VALUES); */
>       ierr = MatSetValuesStencil(A,NODES_PER_EL*P_DOFS,p_eqn,NODES_PER_EL*P_DOFS,p_eqn,Ce,ADD_VALUES);CHKERRQ(ierr);
>     }
>   }
>   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>
>   ierr = DMDAVecRestoreArray(cda,coords,&_coords);CHKERRQ(ierr);
>
>   ierr = DMDAVecRestoreArray(properties_da,local_properties,&props);CHKERRQ(ierr);
>   ierr = VecDestroy(&local_properties);CHKERRQ(ierr);
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "DMDASetValuesLocalStencil_ADD_VALUES"
> static PetscErrorCode DMDASetValuesLocalStencil_ADD_VALUES(StokesDOF **fields_F,MatStencil u_eqn[],MatStencil p_eqn[],PetscScalar Fe_u[],PetscScalar Fe_p[])
> {
>   PetscInt n;
>
>   PetscFunctionBeginUser;
>   for (n = 0; n < 4; n++) {
>     fields_F[u_eqn[2*n].j][u_eqn[2*n].i].u_dof     = fields_F[u_eqn[2*n].j][u_eqn[2*n].i].u_dof+Fe_u[2*n];
>     fields_F[u_eqn[2*n+1].j][u_eqn[2*n+1].i].v_dof = fields_F[u_eqn[2*n+1].j][u_eqn[2*n+1].i].v_dof+Fe_u[2*n+1];
>     fields_F[p_eqn[n].j][p_eqn[n].i].p_dof         = fields_F[p_eqn[n].j][p_eqn[n].i].p_dof+Fe_p[n];
>   }
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "AssembleF_Stokes"
> static PetscErrorCode AssembleF_Stokes(Vec F,DM stokes_da,DM properties_da,Vec properties)
> {
>   DM                     cda;
>   Vec                    coords;
>   DMDACoor2d             **_coords;
>   MatStencil             u_eqn[NODES_PER_EL*U_DOFS]; /* 2 degrees of freedom */
>   MatStencil             p_eqn[NODES_PER_EL*P_DOFS]; /* 1 degrees of freedom */
>   PetscInt               sex,sey,mx,my;
>   PetscInt               ei,ej;
>   PetscScalar            Fe[NODES_PER_EL*U_DOFS];
>   PetscScalar            He[NODES_PER_EL*P_DOFS];
>   PetscScalar            el_coords[NODES_PER_EL*NSD];
>   Vec                    local_properties;
>   GaussPointCoefficients **props;
>   PetscScalar            *prop_fx,*prop_fy;
>   Vec                    local_F;
>   StokesDOF              **ff;
>   PetscErrorCode         ierr;
>
>   PetscFunctionBeginUser;
>   /* setup for coords */
>   ierr = DMGetCoordinateDM(stokes_da,&cda);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(stokes_da,&coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cda,coords,&_coords);CHKERRQ(ierr);
>
>   /* setup for coefficients */
>   ierr = DMGetLocalVector(properties_da,&local_properties);CHKERRQ(ierr);
>   ierr = DMGlobalToLocalBegin(properties_da,properties,INSERT_VALUES,local_properties);CHKERRQ(ierr);
>   ierr = DMGlobalToLocalEnd(properties_da,properties,INSERT_VALUES,local_properties);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(properties_da,local_properties,&props);CHKERRQ(ierr);
>
>   /* get acces to the vector */
>   ierr = DMGetLocalVector(stokes_da,&local_F);CHKERRQ(ierr);
>   ierr = VecZeroEntries(local_F);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(stokes_da,local_F,&ff);CHKERRQ(ierr);
>
>
>   ierr = DMDAGetElementCorners(stokes_da,&sex,&sey,0,&mx,&my,0);CHKERRQ(ierr);
>   for (ej = sey; ej < sey+my; ej++) {
>     for (ei = sex; ei < sex+mx; ei++) {
>       /* get coords for the element */
>       ierr = GetElementCoords(_coords,ei,ej,el_coords);CHKERRQ(ierr);
>
>       /* get coefficients for the element */
>       prop_fx = props[ej][ei].fx;
>       prop_fy = props[ej][ei].fy;
>
>       /* initialise element stiffness matrix */
>       ierr = PetscMemzero(Fe,sizeof(PetscScalar)*NODES_PER_EL*U_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(He,sizeof(PetscScalar)*NODES_PER_EL*P_DOFS);CHKERRQ(ierr);
>
>
>       /* form element stiffness matrix */
>       FormMomentumRhsQ1(Fe,el_coords,prop_fx,prop_fy);
>
>       /* insert element matrix into global matrix */
>       ierr = DMDAGetElementEqnums_up(u_eqn,p_eqn,ei,ej);CHKERRQ(ierr);
>
>       ierr = DMDASetValuesLocalStencil_ADD_VALUES(ff,u_eqn,p_eqn,Fe,He);CHKERRQ(ierr);
>     }
>   }
>
>   ierr = DMDAVecRestoreArray(stokes_da,local_F,&ff);CHKERRQ(ierr);
>   ierr = DMLocalToGlobalBegin(stokes_da,local_F,ADD_VALUES,F);CHKERRQ(ierr);
>   ierr = DMLocalToGlobalEnd(stokes_da,local_F,ADD_VALUES,F);CHKERRQ(ierr);
>   ierr = DMRestoreLocalVector(stokes_da,&local_F);CHKERRQ(ierr);
>
>
>   ierr = DMDAVecRestoreArray(cda,coords,&_coords);CHKERRQ(ierr);
>
>   ierr = DMDAVecRestoreArray(properties_da,local_properties,&props);CHKERRQ(ierr);
>   ierr = DMRestoreLocalVector(properties_da,&local_properties);CHKERRQ(ierr);
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "DMDACreateSolCx"
> static PetscErrorCode DMDACreateSolCx(PetscReal eta0,PetscReal eta1,PetscReal xc,PetscInt nz,PetscInt mx,PetscInt my,DM *_da,Vec *_X)
> {
>   DM             da,cda;
>   Vec            X,local_X;
>   StokesDOF      **_stokes;
>   Vec            coords;
>   DMDACoor2d     **_coords;
>   PetscInt       si,sj,ei,ej,i,j;
>   PetscErrorCode ierr;
>
>   PetscFunctionBeginUser;
>   ierr = DMDACreate2d(PETSC_COMM_WORLD, DMDA_BOUNDARY_NONE, DMDA_BOUNDARY_NONE,DMDA_STENCIL_BOX,
>                       mx+1,my+1,PETSC_DECIDE,PETSC_DECIDE,3,1,NULL,NULL,&da);CHKERRQ(ierr);
>   ierr = DMDASetFieldName(da,0,"anlytic_Vx");CHKERRQ(ierr);
>   ierr = DMDASetFieldName(da,1,"anlytic_Vy");CHKERRQ(ierr);
>   ierr = DMDASetFieldName(da,2,"analytic_P");CHKERRQ(ierr);
>
>
>   ierr = DMDASetUniformCoordinates(da,0.0,1.0,0.0,1.0,0.,0.);CHKERRQ(ierr);
>
>
>   ierr = DMGetCoordinatesLocal(da,&coords);CHKERRQ(ierr);
>   ierr = DMGetCoordinateDM(da,&cda);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cda,coords,&_coords);CHKERRQ(ierr);
>
>   ierr = DMCreateGlobalVector(da,&X);CHKERRQ(ierr);
>   ierr = DMCreateLocalVector(da,&local_X);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(da,local_X,&_stokes);CHKERRQ(ierr);
>
>   ierr = DMDAGetGhostCorners(da,&si,&sj,0,&ei,&ej,0);CHKERRQ(ierr);
>   for (j = sj; j < sj+ej; j++) {
>     for (i = si; i < si+ei; i++) {
>       double pos[2],pressure,vel[2],total_stress[3],strain_rate[3];
>
>       pos[0] = PetscRealPart(_coords[j][i].x);
>       pos[1] = PetscRealPart(_coords[j][i].y);
>
>       evaluate_solCx(pos,eta0,eta1,xc,nz,vel,&pressure,total_stress,strain_rate);
>
>       _stokes[j][i].u_dof = vel[0];
>       _stokes[j][i].v_dof = vel[1];
>       _stokes[j][i].p_dof = pressure;
>     }
>   }
>   ierr = DMDAVecRestoreArray(da,local_X,&_stokes);CHKERRQ(ierr);
>   ierr = DMDAVecRestoreArray(cda,coords,&_coords);CHKERRQ(ierr);
>
>   ierr = DMLocalToGlobalBegin(da,local_X,INSERT_VALUES,X);CHKERRQ(ierr);
>   ierr = DMLocalToGlobalEnd(da,local_X,INSERT_VALUES,X);CHKERRQ(ierr);
>
>   ierr = VecDestroy(&local_X);CHKERRQ(ierr);
>
>   *_da = da;
>   *_X  = X;
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "StokesDAGetNodalFields"
> static PetscErrorCode StokesDAGetNodalFields(StokesDOF **fields,PetscInt ei,PetscInt ej,StokesDOF nodal_fields[])
> {
>   PetscFunctionBeginUser;
>   /* get the nodal fields */
>   nodal_fields[0].u_dof = fields[ej][ei].u_dof;nodal_fields[0].v_dof = fields[ej][ei].v_dof;nodal_fields[0].p_dof = fields[ej][ei].p_dof;
>   nodal_fields[1].u_dof = fields[ej+1][ei].u_dof;nodal_fields[1].v_dof = fields[ej+1][ei].v_dof;nodal_fields[1].p_dof = fields[ej+1][ei].p_dof;
>   nodal_fields[2].u_dof = fields[ej+1][ei+1].u_dof;nodal_fields[2].v_dof = fields[ej+1][ei+1].v_dof;nodal_fields[2].p_dof = fields[ej+1][ei+1].p_dof;
>   nodal_fields[3].u_dof = fields[ej][ei+1].u_dof;nodal_fields[3].v_dof = fields[ej][ei+1].v_dof;nodal_fields[3].p_dof = fields[ej][ei+1].p_dof;
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "DMDAIntegrateErrors"
> static PetscErrorCode DMDAIntegrateErrors(DM stokes_da,Vec X,Vec X_analytic)
> {
>   DM          cda;
>   Vec         coords,X_analytic_local,X_local;
>   DMDACoor2d  **_coords;
>   PetscInt    sex,sey,mx,my;
>   PetscInt    ei,ej;
>   PetscScalar el_coords[NODES_PER_EL*NSD];
>   StokesDOF   **stokes_analytic,**stokes;
>   StokesDOF   stokes_analytic_e[4],stokes_e[4];
>
>   PetscScalar    GNi_p[NSD][NODES_PER_EL],GNx_p[NSD][NODES_PER_EL];
>   PetscScalar    Ni_p[NODES_PER_EL];
>   PetscInt       ngp;
>   PetscScalar    gp_xi[GAUSS_POINTS][2];
>   PetscScalar    gp_weight[GAUSS_POINTS];
>   PetscInt       p,i;
>   PetscScalar    J_p,fac;
>   PetscScalar    h,p_e_L2,u_e_L2,u_e_H1,p_L2,u_L2,u_H1,tp_L2,tu_L2,tu_H1;
>   PetscInt       M;
>   PetscReal      xymin[2],xymax[2];
>   PetscErrorCode ierr;
>
>   PetscFunctionBeginUser;
>   /* define quadrature rule */
>   ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
>
>   /* setup for coords */
>   ierr = DMGetCoordinateDM(stokes_da,&cda);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(stokes_da,&coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cda,coords,&_coords);CHKERRQ(ierr);
>
>   /* setup for analytic */
>   ierr = DMCreateLocalVector(stokes_da,&X_analytic_local);CHKERRQ(ierr);
>   ierr = DMGlobalToLocalBegin(stokes_da,X_analytic,INSERT_VALUES,X_analytic_local);CHKERRQ(ierr);
>   ierr = DMGlobalToLocalEnd(stokes_da,X_analytic,INSERT_VALUES,X_analytic_local);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(stokes_da,X_analytic_local,&stokes_analytic);CHKERRQ(ierr);
>
>   /* setup for solution */
>   ierr = DMCreateLocalVector(stokes_da,&X_local);CHKERRQ(ierr);
>   ierr = DMGlobalToLocalBegin(stokes_da,X,INSERT_VALUES,X_local);CHKERRQ(ierr);
>   ierr = DMGlobalToLocalEnd(stokes_da,X,INSERT_VALUES,X_local);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(stokes_da,X_local,&stokes);CHKERRQ(ierr);
>
>   ierr = DMDAGetInfo(stokes_da,0,&M,0,0,0,0,0,0,0,0,0,0,0);CHKERRQ(ierr);
>   ierr = DMDAGetBoundingBox(stokes_da,xymin,xymax);CHKERRQ(ierr);
>
>   h = (xymax[0]-xymin[0])/((double)M);
>
>   tp_L2 = tu_L2 = tu_H1 = 0.0;
>
>   ierr = DMDAGetElementCorners(stokes_da,&sex,&sey,0,&mx,&my,0);CHKERRQ(ierr);
>   for (ej = sey; ej < sey+my; ej++) {
>     for (ei = sex; ei < sex+mx; ei++) {
>       /* get coords for the element */
>       ierr = GetElementCoords(_coords,ei,ej,el_coords);CHKERRQ(ierr);
>       ierr = StokesDAGetNodalFields(stokes,ei,ej,stokes_e);CHKERRQ(ierr);
>       ierr = StokesDAGetNodalFields(stokes_analytic,ei,ej,stokes_analytic_e);CHKERRQ(ierr);
>
>       /* evaluate integral */
>       p_e_L2 = 0.0;
>       u_e_L2 = 0.0;
>       u_e_H1 = 0.0;
>       for (p = 0; p < ngp; p++) {
>         FEBasisQ1Evaluate_Ni(gp_xi[p],Ni_p);
>         FEBasisQ1Evaluate_dNidxi(gp_xi[p],GNi_p);
>         FEBasisQ1EvaluateGeometry(GNi_p,GNx_p,el_coords,&J_p);
>         fac = gp_weight[p]*J_p;
>
>         for (i = 0; i < NODES_PER_EL; i++) {
>           PetscScalar u_error,v_error;
>
>           p_e_L2 = p_e_L2+fac*Ni_p[i]*(stokes_e[i].p_dof-stokes_analytic_e[i].p_dof)*(stokes_e[i].p_dof-stokes_analytic_e[i].p_dof);
>
>           u_error = stokes_e[i].u_dof-stokes_analytic_e[i].u_dof;
>           v_error = stokes_e[i].v_dof-stokes_analytic_e[i].v_dof;
>           u_e_L2 += fac*Ni_p[i]*(u_error*u_error+v_error*v_error);
>
>           u_e_H1 = u_e_H1+fac*(GNx_p[0][i]*u_error*GNx_p[0][i]*u_error              /* du/dx */
>                                +GNx_p[1][i]*u_error*GNx_p[1][i]*u_error               /* du/dy */
>                                +GNx_p[0][i]*v_error*GNx_p[0][i]*v_error               /* dv/dx */
>                                +GNx_p[1][i]*v_error*GNx_p[1][i]*v_error);             /* dv/dy */
>         }
>       }
>
>       tp_L2 += p_e_L2;
>       tu_L2 += u_e_L2;
>       tu_H1 += u_e_H1;
>     }
>   }
>   ierr = MPI_Allreduce(&tp_L2,&p_L2,1,MPIU_SCALAR,MPIU_SUM,PETSC_COMM_WORLD);CHKERRQ(ierr);
>   ierr = MPI_Allreduce(&tu_L2,&u_L2,1,MPIU_SCALAR,MPIU_SUM,PETSC_COMM_WORLD);CHKERRQ(ierr);
>   ierr = MPI_Allreduce(&tu_H1,&u_H1,1,MPIU_SCALAR,MPIU_SUM,PETSC_COMM_WORLD);CHKERRQ(ierr);
>   p_L2 = PetscSqrtScalar(p_L2);
>   u_L2 = PetscSqrtScalar(u_L2);
>   u_H1 = PetscSqrtScalar(u_H1);
>
>   ierr = PetscPrintf(PETSC_COMM_WORLD,"%1.4e   %1.4e   %1.4e   %1.4e \n",PetscRealPart(h),PetscRealPart(p_L2),PetscRealPart(u_L2),PetscRealPart(u_H1));CHKERRQ(ierr);
>
>
>   ierr = DMDAVecRestoreArray(cda,coords,&_coords);CHKERRQ(ierr);
>
>   ierr = DMDAVecRestoreArray(stokes_da,X_analytic_local,&stokes_analytic);CHKERRQ(ierr);
>   ierr = VecDestroy(&X_analytic_local);CHKERRQ(ierr);
>   ierr = DMDAVecRestoreArray(stokes_da,X_local,&stokes);CHKERRQ(ierr);
>   ierr = VecDestroy(&X_local);CHKERRQ(ierr);
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "solve_stokes_2d_coupled"
> static PetscErrorCode solve_stokes_2d_coupled(PetscInt mx,PetscInt my)
> {
>   DM                     da_Stokes,da_prop;
>   PetscInt               u_dof,p_dof,dof,stencil_width;
>   Mat                    A,B;
>   PetscInt               mxl,myl;
>   DM                     prop_cda,vel_cda;
>   Vec                    prop_coords,vel_coords;
>   PetscInt               si,sj,nx,ny,i,j,p;
>   Vec                    f,X;
>   PetscInt               prop_dof,prop_stencil_width;
>   Vec                    properties,l_properties;
>   PetscReal              dx,dy;
>   PetscInt               M,N;
>   DMDACoor2d             **_prop_coords,**_vel_coords;
>   GaussPointCoefficients **element_props;
>   PetscInt               its;
>   KSP                    ksp_S;
>   PetscInt               coefficient_structure = 0;
>   PetscInt               cpu_x,cpu_y,*lx = NULL,*ly = NULL;
>   PetscBool              use_gp_coords = PETSC_FALSE,set;
>   char                   filename[PETSC_MAX_PATH_LEN];
>   PetscErrorCode         ierr;
>
>   PetscFunctionBeginUser;
>   /* Generate the da for velocity and pressure */
>   /*
>   We use Q1 elements for the temperature.
>   FEM has a 9-point stencil (BOX) or connectivity pattern
>   Num nodes in each direction is mx+1, my+1
>   */
>   u_dof         = U_DOFS; /* Vx, Vy - velocities */
>   p_dof         = P_DOFS; /* p - pressure */
>   dof           = u_dof+p_dof;
>   stencil_width = 1;
>   ierr          = DMDACreate2d(PETSC_COMM_WORLD, DMDA_BOUNDARY_NONE, DMDA_BOUNDARY_NONE,DMDA_STENCIL_BOX,
>                                mx+1,my+1,PETSC_DECIDE,PETSC_DECIDE,dof,stencil_width,NULL,NULL,&da_Stokes);CHKERRQ(ierr);
>   ierr = DMDASetFieldName(da_Stokes,0,"Vx");CHKERRQ(ierr);
>   ierr = DMDASetFieldName(da_Stokes,1,"Vy");CHKERRQ(ierr);
>   ierr = DMDASetFieldName(da_Stokes,2,"P");CHKERRQ(ierr);
>
>   /* unit box [0,1] x [0,1] */
>   ierr = DMDASetUniformCoordinates(da_Stokes,0.0,1.0,0.0,1.0,0.,0.);CHKERRQ(ierr);
>
>
>   /* Generate element properties, we will assume all material properties are constant over the element */
>   /* local number of elements */
>   ierr = DMDAGetLocalElementSize(da_Stokes,&mxl,&myl,NULL);CHKERRQ(ierr);
>
>   /* !!! IN PARALLEL WE MUST MAKE SURE THE TWO DMDA's ALIGN !!!  */
>   ierr = DMDAGetInfo(da_Stokes,0,0,0,0,&cpu_x,&cpu_y,0,0,0,0,0,0,0);CHKERRQ(ierr);
>   ierr = DMDAGetElementOwnershipRanges2d(da_Stokes,&lx,&ly);CHKERRQ(ierr);
>
>   prop_dof           = (int)(sizeof(GaussPointCoefficients)/sizeof(PetscScalar)); /* gauss point setup */
>   prop_stencil_width = 0;
>   ierr               = DMDACreate2d(PETSC_COMM_WORLD, DMDA_BOUNDARY_NONE, DMDA_BOUNDARY_NONE,DMDA_STENCIL_BOX,
>                                     mx,my,cpu_x,cpu_y,prop_dof,prop_stencil_width,lx,ly,&da_prop);CHKERRQ(ierr);
>   ierr = PetscFree(lx);CHKERRQ(ierr);
>   ierr = PetscFree(ly);CHKERRQ(ierr);
>
>   /* define centroid positions */
>   ierr = DMDAGetInfo(da_prop,0,&M,&N,0,0,0,0,0,0,0,0,0,0);CHKERRQ(ierr);
>   dx   = 1.0/((PetscReal)(M));
>   dy   = 1.0/((PetscReal)(N));
>
>   ierr = DMDASetUniformCoordinates(da_prop,0.0+0.5*dx,1.0-0.5*dx,0.0+0.5*dy,1.0-0.5*dy,0.,0);CHKERRQ(ierr);
>
>   /* define coefficients */
>   ierr = PetscOptionsGetInt(NULL,"-c_str",&coefficient_structure,NULL);CHKERRQ(ierr);
>   /*     PetscPrintf(PETSC_COMM_WORLD, "Using coeficient structure %D \n", coefficient_structure); */
>
>   ierr = DMCreateGlobalVector(da_prop,&properties);CHKERRQ(ierr);
>   ierr = DMCreateLocalVector(da_prop,&l_properties);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(da_prop,l_properties,&element_props);CHKERRQ(ierr);
>
>   ierr = DMGetCoordinateDM(da_prop,&prop_cda);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(da_prop,&prop_coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(prop_cda,prop_coords,&_prop_coords);CHKERRQ(ierr);
>
>   ierr = DMDAGetGhostCorners(prop_cda,&si,&sj,0,&nx,&ny,0);CHKERRQ(ierr);
>
>   ierr = DMGetCoordinateDM(da_Stokes,&vel_cda);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(da_Stokes,&vel_coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(vel_cda,vel_coords,&_vel_coords);CHKERRQ(ierr);
>
>
>   /* interpolate the coordinates */
>   for (j = sj; j < sj+ny; j++) {
>     for (i = si; i < si+nx; i++) {
>       PetscInt    ngp;
>       PetscScalar gp_xi[GAUSS_POINTS][2],gp_weight[GAUSS_POINTS];
>       PetscScalar el_coords[8];
>
>       ierr = GetElementCoords(_vel_coords,i,j,el_coords);CHKERRQ(ierr);
>       ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
>
>       for (p = 0; p < GAUSS_POINTS; p++) {
>         PetscScalar gp_x,gp_y;
>         PetscInt    n;
>         PetscScalar xi_p[2],Ni_p[4];
>
>         xi_p[0] = gp_xi[p][0];
>         xi_p[1] = gp_xi[p][1];
>         FEBasisQ1Evaluate_Ni(xi_p,Ni_p);
>
>         gp_x = 0.0;
>         gp_y = 0.0;
>         for (n = 0; n < NODES_PER_EL; n++) {
>           gp_x = gp_x+Ni_p[n]*el_coords[2*n];
>           gp_y = gp_y+Ni_p[n]*el_coords[2*n+1];
>         }
>         element_props[j][i].gp_coords[2*p]   = gp_x;
>         element_props[j][i].gp_coords[2*p+1] = gp_y;
>       }
>     }
>   }
>
>   /* define the coefficients */
>   ierr = PetscOptionsGetBool(NULL,"-use_gp_coords",&use_gp_coords,0);CHKERRQ(ierr);
>
>   for (j = sj; j < sj+ny; j++) {
>     for (i = si; i < si+nx; i++) {
>       PetscReal centroid_x = PetscRealPart(_prop_coords[j][i].x); /* centroids of cell */
>       PetscReal centroid_y = PetscRealPart(_prop_coords[j][i].y);
>       PetscReal coord_x,coord_y;
>
>       if (coefficient_structure == 0) {
>         PetscReal opts_eta0,opts_eta1,opts_xc;
>         PetscInt  opts_nz;
>
>         opts_eta0 = 1.0;
>         opts_eta1 = 1.0;
>         opts_xc   = 0.5;
>         opts_nz   = 1;
>
>         ierr = PetscOptionsGetReal(NULL,"-solcx_eta0",&opts_eta0,0);CHKERRQ(ierr);
>         ierr = PetscOptionsGetReal(NULL,"-solcx_eta1",&opts_eta1,0);CHKERRQ(ierr);
>         ierr = PetscOptionsGetReal(NULL,"-solcx_xc",&opts_xc,0);CHKERRQ(ierr);
>         ierr = PetscOptionsGetInt(NULL,"-solcx_nz",&opts_nz,0);CHKERRQ(ierr);
>
>         for (p = 0; p < GAUSS_POINTS; p++) {
>           coord_x = centroid_x;
>           coord_y = centroid_y;
>           if (use_gp_coords) {
>             coord_x = PetscRealPart(element_props[j][i].gp_coords[2*p]);
>             coord_y = PetscRealPart(element_props[j][i].gp_coords[2*p+1]);
>           }
>
>
>           element_props[j][i].eta[p] = opts_eta0;
>           if (coord_x > opts_xc) element_props[j][i].eta[p] = opts_eta1;
>
>           element_props[j][i].fx[p] = 0.0;
>           element_props[j][i].fy[p] = sin((double)opts_nz*PETSC_PI*coord_y)*cos(1.0*PETSC_PI*coord_x);
>         }
>       } else if (coefficient_structure == 1) { /* square sinker */
>         PetscReal opts_eta0,opts_eta1,opts_dx,opts_dy;
>
>         opts_eta0 = 1.0;
>         opts_eta1 = 1.0;
>         opts_dx   = 0.50;
>         opts_dy   = 0.50;
>
>         ierr = PetscOptionsGetReal(NULL,"-sinker_eta0",&opts_eta0,0);CHKERRQ(ierr);
>         ierr = PetscOptionsGetReal(NULL,"-sinker_eta1",&opts_eta1,0);CHKERRQ(ierr);
>         ierr = PetscOptionsGetReal(NULL,"-sinker_dx",&opts_dx,0);CHKERRQ(ierr);
>         ierr = PetscOptionsGetReal(NULL,"-sinker_dy",&opts_dy,0);CHKERRQ(ierr);
>
>
>         for (p = 0; p < GAUSS_POINTS; p++) {
>           coord_x = centroid_x;
>           coord_y = centroid_y;
>           if (use_gp_coords) {
>             coord_x = PetscRealPart(element_props[j][i].gp_coords[2*p]);
>             coord_y = PetscRealPart(element_props[j][i].gp_coords[2*p+1]);
>           }
>
>           element_props[j][i].eta[p] = opts_eta0;
>           element_props[j][i].fx[p]  = 0.0;
>           element_props[j][i].fy[p]  = 0.0;
>
>           if ((coord_x > -0.5*opts_dx+0.5) && (coord_x < 0.5*opts_dx+0.5)) {
>             if ((coord_y > -0.5*opts_dy+0.5) && (coord_y < 0.5*opts_dy+0.5)) {
>               element_props[j][i].eta[p] =  opts_eta1;
>               element_props[j][i].fx[p]  =  0.0;
>               element_props[j][i].fy[p]  = -1.0;
>             }
>           }
>         }
>       } else if (coefficient_structure == 2) { /* circular sinker */
>         PetscReal opts_eta0,opts_eta1,opts_r,radius2;
>
>         opts_eta0 = 1.0;
>         opts_eta1 = 1.0;
>         opts_r    = 0.25;
>
>         ierr = PetscOptionsGetReal(NULL,"-sinker_eta0",&opts_eta0,0);CHKERRQ(ierr);
>         ierr = PetscOptionsGetReal(NULL,"-sinker_eta1",&opts_eta1,0);CHKERRQ(ierr);
>         ierr = PetscOptionsGetReal(NULL,"-sinker_r",&opts_r,0);CHKERRQ(ierr);
>
>         for (p = 0; p < GAUSS_POINTS; p++) {
>           coord_x = centroid_x;
>           coord_y = centroid_y;
>           if (use_gp_coords) {
>             coord_x = PetscRealPart(element_props[j][i].gp_coords[2*p]);
>             coord_y = PetscRealPart(element_props[j][i].gp_coords[2*p+1]);
>           }
>
>           element_props[j][i].eta[p] = opts_eta0;
>           element_props[j][i].fx[p]  = 0.0;
>           element_props[j][i].fy[p]  = 0.0;
>
>           radius2 = (coord_x-0.5)*(coord_x-0.5)+(coord_y-0.5)*(coord_y-0.5);
>           if (radius2 < opts_r*opts_r) {
>             element_props[j][i].eta[p] =  opts_eta1;
>             element_props[j][i].fx[p]  =  0.0;
>             element_props[j][i].fy[p]  = -1.0;
>           }
>         }
>       } else if (coefficient_structure == 3) { /* circular and rectangular inclusion */
>         PetscReal opts_eta0,opts_eta1,opts_r,opts_dx,opts_dy,opts_c0x,opts_c0y,opts_s0x,opts_s0y,opts_phi,radius2;
>
>         opts_eta0 = 1.0;
>         opts_eta1 = 1.0;
>         opts_r    = 0.25;
>         opts_c0x  = 0.35;       /* circle center */
>         opts_c0y  = 0.35;
>         opts_s0x  = 0.7;       /* square center */
>         opts_s0y  = 0.7;
>         opts_dx   = 0.25;
>         opts_dy   = 0.25;
>         opts_phi  = 25;
>
>         ierr      = PetscOptionsGetReal(NULL,"-sinker_eta0",&opts_eta0,0);CHKERRQ(ierr);
>         ierr      = PetscOptionsGetReal(NULL,"-sinker_eta1",&opts_eta1,0);CHKERRQ(ierr);
>         ierr      = PetscOptionsGetReal(NULL,"-sinker_r",&opts_r,0);CHKERRQ(ierr);
>         ierr      = PetscOptionsGetReal(NULL,"-sinker_c0x",&opts_c0x,0);CHKERRQ(ierr);
>         ierr      = PetscOptionsGetReal(NULL,"-sinker_c0y",&opts_c0y,0);CHKERRQ(ierr);
>         ierr      = PetscOptionsGetReal(NULL,"-sinker_s0x",&opts_s0x,0);CHKERRQ(ierr);
>         ierr      = PetscOptionsGetReal(NULL,"-sinker_s0y",&opts_s0y,0);CHKERRQ(ierr);
>         ierr      = PetscOptionsGetReal(NULL,"-sinker_dx",&opts_dx,0);CHKERRQ(ierr);
>         ierr      = PetscOptionsGetReal(NULL,"-sinker_dy",&opts_dy,0);CHKERRQ(ierr);
>         ierr      = PetscOptionsGetReal(NULL,"-sinker_phi",&opts_phi,0);CHKERRQ(ierr);
>         opts_phi *= PETSC_PI / 180;
>
>         for (p = 0; p < GAUSS_POINTS; p++) {
>           coord_x = centroid_x;
>           coord_y = centroid_y;
>           if (use_gp_coords) {
>             coord_x = PetscRealPart(element_props[j][i].gp_coords[2*p]);
>             coord_y = PetscRealPart(element_props[j][i].gp_coords[2*p+1]);
>           }
>
>           element_props[j][i].eta[p] = opts_eta0;
>           element_props[j][i].fx[p]  = 0.0;
>           element_props[j][i].fy[p]  = -0.2;
>
>           radius2 = PetscSqr(coord_x - opts_c0x) + PetscSqr(coord_y - opts_c0y);
>           if (radius2 < opts_r*opts_r
>               || (PetscAbs(+(coord_x - opts_s0x)*cos(opts_phi) + (coord_y - opts_s0y)*sin(opts_phi)) < opts_dx/2
>                   && PetscAbs(-(coord_x - opts_s0x)*sin(opts_phi) + (coord_y - opts_s0y)*cos(opts_phi)) < opts_dy/2)) {
>             element_props[j][i].eta[p] =  opts_eta1;
>             element_props[j][i].fx[p]  =  0.0;
>             element_props[j][i].fy[p]  = -1.0;
>           }
>         }
>       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Unknown coefficient_structure");
>     }
>   }
>   ierr = DMDAVecRestoreArray(prop_cda,prop_coords,&_prop_coords);CHKERRQ(ierr);
>
>   ierr = DMDAVecRestoreArray(vel_cda,vel_coords,&_vel_coords);CHKERRQ(ierr);
>
>   ierr = DMDAVecRestoreArray(da_prop,l_properties,&element_props);CHKERRQ(ierr);
>   ierr = DMLocalToGlobalBegin(da_prop,l_properties,ADD_VALUES,properties);CHKERRQ(ierr);
>   ierr = DMLocalToGlobalEnd(da_prop,l_properties,ADD_VALUES,properties);CHKERRQ(ierr);
>
>
>   /* Generate a matrix with the correct non-zero pattern of type AIJ. This will work in parallel and serial */
>   ierr = DMCreateMatrix(da_Stokes,MATAIJ,&A);CHKERRQ(ierr);
>   ierr = DMCreateMatrix(da_Stokes,MATAIJ,&B);CHKERRQ(ierr);
>   ierr = DMCreateGlobalVector(da_Stokes,&f);CHKERRQ(ierr);
>   ierr = DMCreateGlobalVector(da_Stokes,&X);CHKERRQ(ierr);
>
>   /* assemble A11 */
>   ierr = MatZeroEntries(A);CHKERRQ(ierr);
>   ierr = MatZeroEntries(B);CHKERRQ(ierr);
>   ierr = VecZeroEntries(f);CHKERRQ(ierr);
>
>   ierr = AssembleA_Stokes(A,da_Stokes,da_prop,properties);CHKERRQ(ierr);
>   ierr = AssembleA_PCStokes(B,da_Stokes,da_prop,properties);CHKERRQ(ierr);
>   /* build force vector */
>   ierr = AssembleF_Stokes(f,da_Stokes,da_prop,properties);CHKERRQ(ierr);
>
>   ierr = DMDABCApplyFreeSlip(da_Stokes,A,f);CHKERRQ(ierr);
>   ierr = DMDABCApplyFreeSlip(da_Stokes,B,NULL);CHKERRQ(ierr);
>
>   /* SOLVE */
>   ierr = KSPCreate(PETSC_COMM_WORLD,&ksp_S);CHKERRQ(ierr);
>   ierr = KSPSetOptionsPrefix(ksp_S,"stokes_");CHKERRQ(ierr);
>   ierr = KSPSetOperators(ksp_S,A,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
>   ierr = KSPSetDM(ksp_S,da_Stokes);CHKERRQ(ierr);
>   ierr = KSPSetDMActive(ksp_S,PETSC_FALSE);CHKERRQ(ierr);
>   ierr = KSPSetFromOptions(ksp_S);CHKERRQ(ierr);
>   {
>     PC             pc;
>     const PetscInt ufields[] = {0,1},pfields[1] = {2};
>     ierr = KSPGetPC(ksp_S,&pc);CHKERRQ(ierr);
>     ierr = PCFieldSplitSetBlockSize(pc,3);CHKERRQ(ierr);
>     ierr = PCFieldSplitSetFields(pc,"u",2,ufields,ufields);CHKERRQ(ierr);
>     ierr = PCFieldSplitSetFields(pc,"p",1,pfields,pfields);CHKERRQ(ierr);
>   }
>
>   ierr = KSPSolve(ksp_S,f,X);CHKERRQ(ierr);
>
> 	ierr = StokesDMDAView(da_Stokes,X);CHKERRQ(ierr);
> 	ierr = StokesCoeffDMDAView(da_prop,properties);CHKERRQ(ierr);
> 	
>   ierr = PetscOptionsGetString(NULL,"-o",filename,sizeof(filename),&set);CHKERRQ(ierr);
>   if (set) {
>     char        *ext;
>     PetscViewer viewer;
>     ierr = PetscViewerCreate(PETSC_COMM_WORLD,&viewer);CHKERRQ(ierr);
>     ierr = PetscStrrchr(filename,'.',&ext);CHKERRQ(ierr);
>     if (!strcmp("vts",ext)) {
>       ierr = PetscViewerSetType(viewer,PETSCVIEWERVTK);CHKERRQ(ierr);
>     } else {
>       ierr = PetscViewerSetType(viewer,PETSCVIEWERBINARY);CHKERRQ(ierr);
>     }
>     ierr = PetscViewerFileSetMode(viewer,FILE_MODE_WRITE);CHKERRQ(ierr);
>     ierr = PetscViewerFileSetName(viewer,filename);CHKERRQ(ierr);
>     ierr = VecView(X,viewer);CHKERRQ(ierr);
>     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
>   }
>
>   ierr = KSPGetIterationNumber(ksp_S,&its);CHKERRQ(ierr);
>
>   if (coefficient_structure == 0) {
>     PetscReal opts_eta0,opts_eta1,opts_xc;
>     PetscInt  opts_nz,N;
>     DM        da_Stokes_analytic;
>     Vec       X_analytic;
>     PetscReal nrm1[3],nrm2[3],nrmI[3];
>
>     opts_eta0 = 1.0;
>     opts_eta1 = 1.0;
>     opts_xc   = 0.5;
>     opts_nz   = 1;
>
>     ierr = PetscOptionsGetReal(NULL,"-solcx_eta0",&opts_eta0,0);CHKERRQ(ierr);
>     ierr = PetscOptionsGetReal(NULL,"-solcx_eta1",&opts_eta1,0);CHKERRQ(ierr);
>     ierr = PetscOptionsGetReal(NULL,"-solcx_xc",&opts_xc,0);CHKERRQ(ierr);
>     ierr = PetscOptionsGetInt(NULL,"-solcx_nz",&opts_nz,0);CHKERRQ(ierr);
>
>
>     ierr = DMDACreateSolCx(opts_eta0,opts_eta1,opts_xc,opts_nz,mx,my,&da_Stokes_analytic,&X_analytic);CHKERRQ(ierr);
>
>     ierr = DMDAIntegrateErrors(da_Stokes_analytic,X,X_analytic);CHKERRQ(ierr);
>
>
>     ierr = VecAXPY(X_analytic,-1.0,X);CHKERRQ(ierr);
>     ierr = VecGetSize(X_analytic,&N);CHKERRQ(ierr);
>     N    = N/3;
>
>     ierr = VecStrideNorm(X_analytic,0,NORM_1,&nrm1[0]);CHKERRQ(ierr);
>     ierr = VecStrideNorm(X_analytic,0,NORM_2,&nrm2[0]);CHKERRQ(ierr);
>     ierr = VecStrideNorm(X_analytic,0,NORM_INFINITY,&nrmI[0]);CHKERRQ(ierr);
>
>     ierr = VecStrideNorm(X_analytic,1,NORM_1,&nrm1[1]);CHKERRQ(ierr);
>     ierr = VecStrideNorm(X_analytic,1,NORM_2,&nrm2[1]);CHKERRQ(ierr);
>     ierr = VecStrideNorm(X_analytic,1,NORM_INFINITY,&nrmI[1]);CHKERRQ(ierr);
>
>     ierr = VecStrideNorm(X_analytic,2,NORM_1,&nrm1[2]);CHKERRQ(ierr);
>     ierr = VecStrideNorm(X_analytic,2,NORM_2,&nrm2[2]);CHKERRQ(ierr);
>     ierr = VecStrideNorm(X_analytic,2,NORM_INFINITY,&nrmI[2]);CHKERRQ(ierr);
>
>     ierr = DMDestroy(&da_Stokes_analytic);CHKERRQ(ierr);
>     ierr = VecDestroy(&X_analytic);CHKERRQ(ierr);
>   }
>
>
>   ierr = KSPDestroy(&ksp_S);CHKERRQ(ierr);
>   ierr = VecDestroy(&X);CHKERRQ(ierr);
>   ierr = VecDestroy(&f);CHKERRQ(ierr);
>   ierr = MatDestroy(&A);CHKERRQ(ierr);
>   ierr = MatDestroy(&B);CHKERRQ(ierr);
>
>   ierr = DMDestroy(&da_Stokes);CHKERRQ(ierr);
>   ierr = DMDestroy(&da_prop);CHKERRQ(ierr);
>
>   ierr = VecDestroy(&properties);CHKERRQ(ierr);
>   ierr = VecDestroy(&l_properties);CHKERRQ(ierr);
>   PetscFunctionReturn(0);
> }
>
> /* -------------------------- helpers for boundary conditions -------------------------------- */
>
> #undef __FUNCT__
> #define __FUNCT__ "BCApply_EAST"
> static PetscErrorCode BCApply_EAST(DM da,PetscInt d_idx,PetscScalar bc_val,Mat A,Vec b)
> {
>   DM             cda;
>   Vec            coords;
>   PetscInt       si,sj,nx,ny,i,j;
>   PetscInt       M,N;
>   DMDACoor2d     **_coords;
>   PetscInt       *g_idx;
>   PetscInt       *bc_global_ids;
>   PetscScalar    *bc_vals;
>   PetscInt       nbcs;
>   PetscInt       n_dofs;
>   PetscErrorCode ierr;
>
>   PetscFunctionBeginUser;
>   /* enforce bc's */
>   ierr = DMDAGetGlobalIndices(da,NULL,&g_idx);CHKERRQ(ierr);
>
>   ierr = DMGetCoordinateDM(da,&cda);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(da,&coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cda,coords,&_coords);CHKERRQ(ierr);
>   ierr = DMDAGetGhostCorners(cda,&si,&sj,0,&nx,&ny,0);CHKERRQ(ierr);
>   ierr = DMDAGetInfo(da,0,&M,&N,0,0,0,0,&n_dofs,0,0,0,0,0);CHKERRQ(ierr);
>
>   /* --- */
>
>   ierr = PetscMalloc(sizeof(PetscInt)*ny*n_dofs,&bc_global_ids);CHKERRQ(ierr);
>   ierr = PetscMalloc(sizeof(PetscScalar)*ny*n_dofs,&bc_vals);CHKERRQ(ierr);
>
>   /* init the entries to -1 so VecSetValues will ignore them */
>   for (i = 0; i < ny*n_dofs; i++) bc_global_ids[i] = -1;
>
>   i = nx-1;
>   for (j = 0; j < ny; j++) {
>     PetscInt local_id;
>
>     local_id = i+j*nx;
>
>     bc_global_ids[j] = g_idx[n_dofs*local_id+d_idx];
>
>     bc_vals[j] =  bc_val;
>   }
>   nbcs = 0;
>   if ((si+nx) == (M)) nbcs = ny;
>
>   if (b != NULL) {
>     ierr = VecSetValues(b,nbcs,bc_global_ids,bc_vals,INSERT_VALUES);CHKERRQ(ierr);
>     ierr = VecAssemblyBegin(b);CHKERRQ(ierr);
>     ierr = VecAssemblyEnd(b);CHKERRQ(ierr);
>   }
>   if (A != NULL) {
>     ierr = MatZeroRows(A,nbcs,bc_global_ids,1.0,0,0);CHKERRQ(ierr);
>   }
>
>
>   ierr = PetscFree(bc_vals);CHKERRQ(ierr);
>   ierr = PetscFree(bc_global_ids);CHKERRQ(ierr);
>
>   ierr = DMDAVecRestoreArray(cda,coords,&_coords);CHKERRQ(ierr);
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "BCApply_WEST"
> static PetscErrorCode BCApply_WEST(DM da,PetscInt d_idx,PetscScalar bc_val,Mat A,Vec b)
> {
>   DM             cda;
>   Vec            coords;
>   PetscInt       si,sj,nx,ny,i,j;
>   PetscInt       M,N;
>   DMDACoor2d     **_coords;
>   PetscInt       *g_idx;
>   PetscInt       *bc_global_ids;
>   PetscScalar    *bc_vals;
>   PetscInt       nbcs;
>   PetscInt       n_dofs;
>   PetscErrorCode ierr;
>
>   PetscFunctionBeginUser;
>   /* enforce bc's */
>   ierr = DMDAGetGlobalIndices(da,NULL,&g_idx);CHKERRQ(ierr);
>
>   ierr = DMGetCoordinateDM(da,&cda);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(da,&coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cda,coords,&_coords);CHKERRQ(ierr);
>   ierr = DMDAGetGhostCorners(cda,&si,&sj,0,&nx,&ny,0);CHKERRQ(ierr);
>   ierr = DMDAGetInfo(da,0,&M,&N,0,0,0,0,&n_dofs,0,0,0,0,0);CHKERRQ(ierr);
>
>   /* --- */
>
>   ierr = PetscMalloc(sizeof(PetscInt)*ny*n_dofs,&bc_global_ids);CHKERRQ(ierr);
>   ierr = PetscMalloc(sizeof(PetscScalar)*ny*n_dofs,&bc_vals);CHKERRQ(ierr);
>
>   /* init the entries to -1 so VecSetValues will ignore them */
>   for (i = 0; i < ny*n_dofs; i++) bc_global_ids[i] = -1;
>
>   i = 0;
>   for (j = 0; j < ny; j++) {
>     PetscInt local_id;
>
>     local_id = i+j*nx;
>
>     bc_global_ids[j] = g_idx[n_dofs*local_id+d_idx];
>
>     bc_vals[j] =  bc_val;
>   }
>   nbcs = 0;
>   if (si == 0) nbcs = ny;
>
>   if (b != NULL) {
>     ierr = VecSetValues(b,nbcs,bc_global_ids,bc_vals,INSERT_VALUES);CHKERRQ(ierr);
>     ierr = VecAssemblyBegin(b);CHKERRQ(ierr);
>     ierr = VecAssemblyEnd(b);CHKERRQ(ierr);
>   }
>   if (A != NULL) {
>     ierr = MatZeroRows(A,nbcs,bc_global_ids,1.0,0,0);CHKERRQ(ierr);
>   }
>
>
>   ierr = PetscFree(bc_vals);CHKERRQ(ierr);
>   ierr = PetscFree(bc_global_ids);CHKERRQ(ierr);
>
>   ierr = DMDAVecRestoreArray(cda,coords,&_coords);CHKERRQ(ierr);
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "BCApply_NORTH"
> static PetscErrorCode BCApply_NORTH(DM da,PetscInt d_idx,PetscScalar bc_val,Mat A,Vec b)
> {
>   DM             cda;
>   Vec            coords;
>   PetscInt       si,sj,nx,ny,i,j;
>   PetscInt       M,N;
>   DMDACoor2d     **_coords;
>   PetscInt       *g_idx;
>   PetscInt       *bc_global_ids;
>   PetscScalar    *bc_vals;
>   PetscInt       nbcs;
>   PetscInt       n_dofs;
>   PetscErrorCode ierr;
>
>   PetscFunctionBeginUser;
>   /* enforce bc's */
>   ierr = DMDAGetGlobalIndices(da,NULL,&g_idx);CHKERRQ(ierr);
>
>   ierr = DMGetCoordinateDM(da,&cda);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(da,&coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cda,coords,&_coords);CHKERRQ(ierr);
>   ierr = DMDAGetGhostCorners(cda,&si,&sj,0,&nx,&ny,0);CHKERRQ(ierr);
>   ierr = DMDAGetInfo(da,0,&M,&N,0,0,0,0,&n_dofs,0,0,0,0,0);CHKERRQ(ierr);
>
>   /* --- */
>
>   ierr = PetscMalloc(sizeof(PetscInt)*nx,&bc_global_ids);CHKERRQ(ierr);
>   ierr = PetscMalloc(sizeof(PetscScalar)*nx,&bc_vals);CHKERRQ(ierr);
>
>   /* init the entries to -1 so VecSetValues will ignore them */
>   for (i = 0; i < nx; i++) bc_global_ids[i] = -1;
>
>   j = ny-1;
>   for (i = 0; i < nx; i++) {
>     PetscInt local_id;
>
>     local_id = i+j*nx;
>
>     bc_global_ids[i] = g_idx[n_dofs*local_id+d_idx];
>
>     bc_vals[i] =  bc_val;
>   }
>   nbcs = 0;
>   if ((sj+ny) == (N)) nbcs = nx;
>
>   if (b != NULL) {
>     ierr = VecSetValues(b,nbcs,bc_global_ids,bc_vals,INSERT_VALUES);CHKERRQ(ierr);
>     ierr = VecAssemblyBegin(b);CHKERRQ(ierr);
>     ierr = VecAssemblyEnd(b);CHKERRQ(ierr);
>   }
>   if (A != NULL) {
>     ierr = MatZeroRows(A,nbcs,bc_global_ids,1.0,0,0);CHKERRQ(ierr);
>   }
>
>
>   ierr = PetscFree(bc_vals);CHKERRQ(ierr);
>   ierr = PetscFree(bc_global_ids);CHKERRQ(ierr);
>
>   ierr = DMDAVecRestoreArray(cda,coords,&_coords);CHKERRQ(ierr);
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "BCApply_SOUTH"
> static PetscErrorCode BCApply_SOUTH(DM da,PetscInt d_idx,PetscScalar bc_val,Mat A,Vec b)
> {
>   DM             cda;
>   Vec            coords;
>   PetscInt       si,sj,nx,ny,i,j;
>   PetscInt       M,N;
>   DMDACoor2d     **_coords;
>   PetscInt       *g_idx;
>   PetscInt       *bc_global_ids;
>   PetscScalar    *bc_vals;
>   PetscInt       nbcs;
>   PetscInt       n_dofs;
>   PetscErrorCode ierr;
>
>   PetscFunctionBeginUser;
>   /* enforce bc's */
>   ierr = DMDAGetGlobalIndices(da,NULL,&g_idx);CHKERRQ(ierr);
>
>   ierr = DMGetCoordinateDM(da,&cda);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(da,&coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cda,coords,&_coords);CHKERRQ(ierr);
>   ierr = DMDAGetGhostCorners(cda,&si,&sj,0,&nx,&ny,0);CHKERRQ(ierr);
>   ierr = DMDAGetInfo(da,0,&M,&N,0,0,0,0,&n_dofs,0,0,0,0,0);CHKERRQ(ierr);
>
>   /* --- */
>
>   ierr = PetscMalloc(sizeof(PetscInt)*nx,&bc_global_ids);CHKERRQ(ierr);
>   ierr = PetscMalloc(sizeof(PetscScalar)*nx,&bc_vals);CHKERRQ(ierr);
>
>   /* init the entries to -1 so VecSetValues will ignore them */
>   for (i = 0; i < nx; i++) bc_global_ids[i] = -1;
>
>   j = 0;
>   for (i = 0; i < nx; i++) {
>     PetscInt local_id;
>
>     local_id = i+j*nx;
>
>     bc_global_ids[i] = g_idx[n_dofs*local_id+d_idx];
>
>     bc_vals[i] =  bc_val;
>   }
>   nbcs = 0;
>   if (sj == 0) nbcs = nx;
>
>   if (b != NULL) {
>     ierr = VecSetValues(b,nbcs,bc_global_ids,bc_vals,INSERT_VALUES);CHKERRQ(ierr);
>     ierr = VecAssemblyBegin(b);CHKERRQ(ierr);
>     ierr = VecAssemblyEnd(b);CHKERRQ(ierr);
>   }
>   if (A != NULL) {
>     ierr = MatZeroRows(A,nbcs,bc_global_ids,1.0,0,0);CHKERRQ(ierr);
>   }
>
>
>   ierr = PetscFree(bc_vals);CHKERRQ(ierr);
>   ierr = PetscFree(bc_global_ids);CHKERRQ(ierr);
>
>   ierr = DMDAVecRestoreArray(cda,coords,&_coords);CHKERRQ(ierr);
>   PetscFunctionReturn(0);
> }
>
> /*
> Free slip sides.
> */
> #undef __FUNCT__
> #define __FUNCT__ "DMDABCApplyFreeSlip"
> PetscErrorCode DMDABCApplyFreeSlip(DM da_Stokes,Mat A,Vec f)
> {
>   PetscErrorCode ierr;
>
>   PetscFunctionBeginUser;
>   ierr = BCApply_NORTH(da_Stokes,1,0.0,A,f);CHKERRQ(ierr);
>   ierr = BCApply_EAST(da_Stokes,0,0.0,A,f);CHKERRQ(ierr);
>   ierr = BCApply_SOUTH(da_Stokes,1,0.0,A,f);CHKERRQ(ierr);
>   ierr = BCApply_WEST(da_Stokes,0,0.0,A,f);CHKERRQ(ierr);
>   PetscFunctionReturn(0);
> }
>
> /* viewers */
> #undef __FUNCT__
> #define __FUNCT__ "StokesDMDAView"
> PetscErrorCode StokesDMDAView(DM da,Vec x)
> {
>   PetscErrorCode ierr;
> 	PetscViewer viewer;
> 	
>   PetscFunctionBeginUser;
> 	ierr = PetscViewerVTKOpen(((PetscObject)da)->comm,"stokes_u.vts",FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
> 	
> 	ierr = PetscObjectReference((PetscObject)da);CHKERRQ(ierr);
> 	ierr = PetscObjectReference((PetscObject)x);CHKERRQ(ierr);
>
> 	ierr = PetscViewerVTKAddField(viewer,(PetscObject)da,DMDAVTKWriteAll,PETSC_VTK_POINT_FIELD,(PetscObject)x);CHKERRQ(ierr);
> 	
> 	ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "StokesUPDMDAView"
> PetscErrorCode StokesUPDMDAView(DM da,Vec x,const char suffix[])
> {
>   PetscErrorCode ierr;
> 	Vec velocity,pressure;
> 	DM dav,dap;
> 	PetscViewer viewer;
> 	char fname[1024];
> 	
>   PetscFunctionBeginUser;
>
> 	if (!suffix) {
> 		sprintf(fname,"stokes_u.vts");
> 	} else {
> 		sprintf(fname,"%s-stokes_u.vts",suffix);
> 	}
> 	
> 	ierr = DMCompositeGetEntries(da,&dav,&dap);CHKERRQ(ierr);
> 	ierr = DMCompositeGetAccess(da,x,&velocity,&pressure);CHKERRQ(ierr);
> 	
> 	ierr = PetscObjectReference((PetscObject)dav);CHKERRQ(ierr);
> 	ierr = PetscObjectReference((PetscObject)dap);CHKERRQ(ierr);
> 	ierr = PetscObjectReference((PetscObject)velocity);CHKERRQ(ierr);
> 	ierr = PetscObjectReference((PetscObject)pressure);CHKERRQ(ierr);
> 	
> 	ierr = PetscViewerVTKOpen(((PetscObject)da)->comm,fname,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
> 	ierr = PetscViewerVTKAddField(viewer,(PetscObject)dav,DMDAVTKWriteAll,PETSC_VTK_POINT_FIELD,(PetscObject)velocity);CHKERRQ(ierr);
> 	ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
>
> 	
> 	if (!suffix) {
> 		sprintf(fname,"stokes_p.vts");
> 	} else {
> 		sprintf(fname,"%s-stokes_p.vts",suffix);
> 	}
> 	
> 	ierr = PetscViewerVTKOpen(((PetscObject)da)->comm,fname,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
> 	ierr = PetscViewerVTKAddField(viewer,(PetscObject)dap,DMDAVTKWriteAll,PETSC_VTK_CELL_FIELD,(PetscObject)pressure);CHKERRQ(ierr);
> 	ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
> 	
> 	ierr = DMCompositeRestoreAccess(da,x,&velocity,&pressure);CHKERRQ(ierr);
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "StokesCoeffDMDAView"
> PetscErrorCode StokesCoeffDMDAView(DM da,Vec x)
> {
>   PetscErrorCode ierr;
> 	PetscViewer viewer;
> 	
>   PetscFunctionBeginUser;
> 	ierr = PetscViewerVTKOpen(((PetscObject)da)->comm,"stokes_coeff.vts",FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
> 	
> 	ierr = PetscObjectReference((PetscObject)da);CHKERRQ(ierr);
> 	ierr = PetscObjectReference((PetscObject)x);CHKERRQ(ierr);
> 	
> 	ierr = PetscViewerVTKAddField(viewer,(PetscObject)da,DMDAVTKWriteAll,PETSC_VTK_POINT_FIELD,(PetscObject)x);CHKERRQ(ierr);
> 	
> 	ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
> 	
>   PetscFunctionReturn(0);
> }
>
>
> #undef __FUNCT__
> #define __FUNCT__ "m_view"
> PetscErrorCode m_view(Mat A,const char name[])
> {
> 	PetscViewer viewer;
> 	PetscErrorCode ierr;
> 	
> 	PetscFunctionBeginUser;
> 	PetscViewerBinaryOpen(PETSC_COMM_WORLD,name,FILE_MODE_WRITE,&viewer);	
> 	MatView(A,viewer);
> 	PetscViewerDestroy(&viewer);
> 	PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DMatViewMatlab"
> PetscErrorCode Stokes2DMatViewMatlab(DM dm_stokes,Mat A,Mat B,const char suffix[])
> {
> 	IS *is;
> 	PetscViewer viewer;
> 	char filename[1024];
> 	Mat Auu,Aup,Apu,App,Spp;
> 	PetscErrorCode ierr;
> 	
> 	PetscFunctionBeginUser;
> 	ierr = DMCompositeGetGlobalISs(dm_stokes,&is);CHKERRQ(ierr);
> 	
> 	ierr = MatGetSubMatrix(A,is[0],is[0],MAT_INITIAL_MATRIX,&Auu);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[0],is[1],MAT_INITIAL_MATRIX,&Aup);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[1],is[0],MAT_INITIAL_MATRIX,&Apu);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[1],is[0],MAT_INITIAL_MATRIX,&App);CHKERRQ(ierr);
> 	
> 	ierr = MatGetSubMatrix(B,is[1],is[1],MAT_INITIAL_MATRIX,&Spp);CHKERRQ(ierr);
>
> 	sprintf(filename,"%s-Stokes2DAuu.pmat",suffix);	ierr = m_view(Auu,filename);CHKERRQ(ierr);
> 	sprintf(filename,"%s-Stokes2DAup.pmat",suffix);	ierr = m_view(Aup,filename);CHKERRQ(ierr);
> 	sprintf(filename,"%s-Stokes2DApu.pmat",suffix);	ierr = m_view(Apu,filename);CHKERRQ(ierr);
> 	sprintf(filename,"%s-Stokes2DSpp.pmat",suffix);	ierr = m_view(Spp,filename);CHKERRQ(ierr);
> 	
> 	MatDestroy(&Auu);
> 	MatDestroy(&Aup);
> 	MatDestroy(&Apu);
> 	MatDestroy(&App);
> 	MatDestroy(&Spp);
> 	ISDestroy(&is[0]);
> 	ISDestroy(&is[1]);
> 	PetscFree(is);
> 	
> 	PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "DMDAViewGnuplot2d"
> PetscErrorCode DMDAViewGnuplot2d(DM da,Vec fields,const char comment[],const char prefix[])
> {
>   DM             cda;
>   Vec            coords,local_fields;
>   DMDACoor2d     **_coords;
>   FILE           *fp;
>   char           fname[PETSC_MAX_PATH_LEN];
>   PetscMPIInt    rank;
>   PetscInt       si,sj,nx,ny,i,j;
>   PetscInt       n_dofs,d;
>   PetscScalar    *_fields;
>   PetscErrorCode ierr;
> 	
>   PetscFunctionBeginUser;
>   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
>   ierr = PetscSNPrintf(fname,sizeof(fname),"%s-p%1.4d.dat",prefix,rank);CHKERRQ(ierr);
>   ierr = PetscFOpen(PETSC_COMM_SELF,fname,"w",&fp);CHKERRQ(ierr);
>   if (!fp) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Cannot open file");
> 	
>   ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"### %s (processor %1.4d) ### \n",comment,rank);CHKERRQ(ierr);
>   ierr = DMDAGetInfo(da,0,0,0,0,0,0,0,&n_dofs,0,0,0,0,0);CHKERRQ(ierr);
>   ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"### x y ");CHKERRQ(ierr);
>   for (d = 0; d < n_dofs; d++) {
>     const char *field_name;
>     ierr = DMDAGetFieldName(da,d,&field_name);CHKERRQ(ierr);
>     ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"%s ",field_name);CHKERRQ(ierr);
>   }
>   ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"###\n");CHKERRQ(ierr);
> 	
> 	
>   ierr = DMGetCoordinateDM(da,&cda);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(da,&coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cda,coords,&_coords);CHKERRQ(ierr);
>   ierr = DMDAGetGhostCorners(cda,&si,&sj,0,&nx,&ny,0);CHKERRQ(ierr);
> 	
>   ierr = DMCreateLocalVector(da,&local_fields);CHKERRQ(ierr);
>   ierr = DMGlobalToLocalBegin(da,fields,INSERT_VALUES,local_fields);CHKERRQ(ierr);
>   ierr = DMGlobalToLocalEnd(da,fields,INSERT_VALUES,local_fields);CHKERRQ(ierr);
>   ierr = VecGetArray(local_fields,&_fields);CHKERRQ(ierr);
> 	
> 	
>   for (j = sj; j < sj+ny; j++) {
>     for (i = si; i < si+nx; i++) {
>       PetscScalar coord_x,coord_y;
>       PetscScalar field_d;
> 			
>       coord_x = _coords[j][i].x;
>       coord_y = _coords[j][i].y;
> 			
>       ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"%1.6e %1.6e ",PetscRealPart(coord_x),PetscRealPart(coord_y));CHKERRQ(ierr);
>       for (d = 0; d < n_dofs; d++) {
>         field_d = _fields[n_dofs*((i-si)+(j-sj)*(nx))+d];
>         ierr    = PetscFPrintf(PETSC_COMM_SELF,fp,"%1.6e ",PetscRealPart(field_d));CHKERRQ(ierr);
>       }
>       ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"\n");CHKERRQ(ierr);
>     }
>   }
>   ierr = VecRestoreArray(local_fields,&_fields);CHKERRQ(ierr);
>   ierr = VecDestroy(&local_fields);CHKERRQ(ierr);
> 	
>   ierr = DMDAVecRestoreArray(cda,coords,&_coords);CHKERRQ(ierr);
> 	
>   ierr = PetscFClose(PETSC_COMM_SELF,fp);CHKERRQ(ierr);
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DCreateDMDA_Q1P0"
> PetscErrorCode Stokes2DCreateDMDA_Q1P0(const PetscInt mx,const PetscInt my,DM *dms,DM *dmc)
> {
> 	DM coeff,dav,dap,multipys_pack;
> 	PetscInt vbasis_dofs,pbasis_dofs,coeff_dofs,overlap;
>   PetscErrorCode ierr;
> 	
>   PetscFunctionBeginUser;
>
> 	if (mx%2 != 0) { SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_USER,"-mx must be divisble by 2 for macro fe meshes"); }
> 	if (my%2 != 0) { SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_USER,"-my must be divisble by 2 for macro fe meshes"); }
>
> 	/* coefficients */
>   coeff_dofs = (int)(sizeof(GaussPointCoefficients)/sizeof(PetscScalar)); /* gauss point setup */
>   overlap    = 0;
>   ierr = DMDACreate2d(PETSC_COMM_WORLD,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_STENCIL_BOX,
>                                     mx,my,PETSC_DECIDE,PETSC_DECIDE,coeff_dofs,overlap,0,0,&coeff);CHKERRQ(ierr);
> 	ierr = DMDASetUniformCoordinates(coeff,0.0,1.0, 0.0,1.0, 0.0,0.0);CHKERRQ(ierr);
> 	
> 	/* pressure */
> 	pbasis_dofs = 1;
> 	overlap     = 1; /* then can re-use for Q1-P0 stab */
> 	ierr = DMDACreate2d(PETSC_COMM_WORLD,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_STENCIL_BOX,mx,my,PETSC_DECIDE,PETSC_DECIDE,pbasis_dofs,overlap,0,0,&dap);CHKERRQ(ierr);
> 	ierr = DMDASetUniformCoordinates(dap,0.0,1.0, 0.0,1.0, 0.0,0.0);CHKERRQ(ierr);
> 	
> 	/* velocity */
> 	vbasis_dofs = 2;
> 	overlap     = 1;
> 	ierr = DMDACreate2d(PETSC_COMM_WORLD,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_STENCIL_BOX,mx+1,my+1,PETSC_DECIDE,PETSC_DECIDE,vbasis_dofs,overlap,0,0,&dav);CHKERRQ(ierr);
> 	ierr = DMDASetUniformCoordinates(dav,0.0,1.0, 0.0,1.0, 0.0,0.0);CHKERRQ(ierr);
> 	
> 	/* stokes */
> 	ierr = DMCompositeCreate(PETSC_COMM_WORLD,&multipys_pack);CHKERRQ(ierr);
> 	ierr = DMCompositeAddDM(multipys_pack,dav);CHKERRQ(ierr);	
> 	ierr = DMCompositeAddDM(multipys_pack,dap);CHKERRQ(ierr);	
>
> 	{
> 		IS *is;
>
> 		ierr = DMCompositeGetGlobalISs(multipys_pack,&is);CHKERRQ(ierr);
> 	
> 		ierr = ISDestroy(&is[0]);CHKERRQ(ierr);
> 		ierr = ISDestroy(&is[1]);CHKERRQ(ierr);
> 		ierr = PetscFree(is);CHKERRQ(ierr);
> 	}
> 	
>   ierr = DMDASetFieldName(dap,0,"p");CHKERRQ(ierr);
> 	ierr = DMDASetFieldName(dav,0,"vx");CHKERRQ(ierr);
> 	ierr = DMDASetFieldName(dav,1,"vy");CHKERRQ(ierr);
>   ierr = PetscObjectSetOptionsPrefix((PetscObject)dap,"p_");CHKERRQ(ierr);
>   ierr = PetscObjectSetOptionsPrefix((PetscObject)dav,"u_");CHKERRQ(ierr);
>   ierr = PetscObjectSetOptionsPrefix((PetscObject)multipys_pack,"stk_");CHKERRQ(ierr);
> 	
> 	ierr = DMDestroy(&dav);CHKERRQ(ierr);
> 	ierr = DMDestroy(&dap);CHKERRQ(ierr);
> 	
> 	*dmc = coeff;
> 	*dms = multipys_pack;
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DCreateDMDA_Q1mP1"
> PetscErrorCode Stokes2DCreateDMDA_Q1mP1(const PetscInt mx,const PetscInt my,DM *dms,DM *dmc)
> {
> 	DM coeff,dav,dap,multipys_pack;
> 	PetscInt vbasis_dofs,pbasis_dofs,coeff_dofs,overlap;
>   PetscErrorCode ierr;
> 	
>   PetscFunctionBeginUser;
> 	
> 	if (mx%2 != 0) { SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_USER,"-mx must be divisble by 2 for macro fe meshes"); }
> 	if (my%2 != 0) { SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_USER,"-my must be divisble by 2 for macro fe meshes"); }
> 	
> 	/* coefficients */
>   coeff_dofs = (int)(sizeof(GaussPointCoefficients)/sizeof(PetscScalar)); /* gauss point setup */
>   overlap    = 0;
>   ierr = DMDACreate2d(PETSC_COMM_WORLD,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_STENCIL_BOX,
> 											mx,my,PETSC_DECIDE,PETSC_DECIDE,coeff_dofs,overlap,0,0,&coeff);CHKERRQ(ierr);
> 	ierr = DMDASetUniformCoordinates(coeff,0.0,1.0, 0.0,1.0, 0.0,0.0);CHKERRQ(ierr);
> 	
> 	/* pressure */
> 	pbasis_dofs = 3;
> 	overlap     = 0;
> 	ierr = DMDACreate2d(PETSC_COMM_WORLD,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_STENCIL_BOX,mx/2,my/2,PETSC_DECIDE,PETSC_DECIDE,pbasis_dofs,overlap,0,0,&dap);CHKERRQ(ierr);
> 	ierr = DMDASetUniformCoordinates(dap,0.0,1.0, 0.0,1.0, 0.0,0.0);CHKERRQ(ierr);
> 	
> 	/* velocity */
> 	vbasis_dofs = 2;
> 	overlap     = 1;
> 	ierr = DMDACreate2d(PETSC_COMM_WORLD,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_STENCIL_BOX,mx+1,my+1,PETSC_DECIDE,PETSC_DECIDE,vbasis_dofs,overlap,0,0,&dav);CHKERRQ(ierr);
> 	ierr = DMDASetUniformCoordinates(dav,0.0,1.0, 0.0,1.0, 0.0,0.0);CHKERRQ(ierr);
> 	
> 	/* stokes */
> 	ierr = DMCompositeCreate(PETSC_COMM_WORLD,&multipys_pack);CHKERRQ(ierr);
> 	ierr = DMCompositeAddDM(multipys_pack,dav);CHKERRQ(ierr);	
> 	ierr = DMCompositeAddDM(multipys_pack,dap);CHKERRQ(ierr);	
> 	
> 	{
> 		IS *is;
> 		
> 		ierr = DMCompositeGetGlobalISs(multipys_pack,&is);CHKERRQ(ierr);
> 		
> 		ierr = ISDestroy(&is[0]);CHKERRQ(ierr);
> 		ierr = ISDestroy(&is[1]);CHKERRQ(ierr);
> 		ierr = PetscFree(is);CHKERRQ(ierr);
> 	}
> 	
>   ierr = DMDASetFieldName(dap,0,"p");CHKERRQ(ierr);
> 	ierr = DMDASetFieldName(dav,0,"vx");CHKERRQ(ierr);
> 	ierr = DMDASetFieldName(dav,1,"vy");CHKERRQ(ierr);
>   ierr = PetscObjectSetOptionsPrefix((PetscObject)dap,"p_");CHKERRQ(ierr);
>   ierr = PetscObjectSetOptionsPrefix((PetscObject)dav,"u_");CHKERRQ(ierr);
>   ierr = PetscObjectSetOptionsPrefix((PetscObject)multipys_pack,"stk_");CHKERRQ(ierr);
> 	
> 	ierr = DMDestroy(&dav);CHKERRQ(ierr);
> 	ierr = DMDestroy(&dap);CHKERRQ(ierr);
> 	
> 	*dmc = coeff;
> 	*dms = multipys_pack;
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DCreateDMDA_Q1mP0"
> PetscErrorCode Stokes2DCreateDMDA_Q1mP0(const PetscInt mx,const PetscInt my,DM *dms,DM *dmc)
> {
> 	DM coeff,dav,dap,multipys_pack;
> 	PetscInt vbasis_dofs,pbasis_dofs,coeff_dofs,overlap;
>   PetscErrorCode ierr;
> 	
>   PetscFunctionBeginUser;
> 	
> 	if (mx%2 != 0) { SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_USER,"-mx must be divisble by 2 for macro fe meshes"); }
> 	if (my%2 != 0) { SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_USER,"-my must be divisble by 2 for macro fe meshes"); }
> 	
> 	/* coefficients */
>   coeff_dofs = (int)(sizeof(GaussPointCoefficients)/sizeof(PetscScalar)); /* gauss point setup */
>   overlap    = 0;
>   ierr = DMDACreate2d(PETSC_COMM_WORLD,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_STENCIL_BOX,
> 											mx,my,PETSC_DECIDE,PETSC_DECIDE,coeff_dofs,overlap,0,0,&coeff);CHKERRQ(ierr);
> 	ierr = DMDASetUniformCoordinates(coeff,0.0,1.0, 0.0,1.0, 0.0,0.0);CHKERRQ(ierr);
> 	
> 	/* pressure */
> 	pbasis_dofs = 1;
> 	overlap     = 0;
> 	ierr = DMDACreate2d(PETSC_COMM_WORLD,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_STENCIL_BOX,mx/2,my/2,PETSC_DECIDE,PETSC_DECIDE,pbasis_dofs,overlap,0,0,&dap);CHKERRQ(ierr);
> 	ierr = DMDASetUniformCoordinates(dap,0.0,1.0, 0.0,1.0, 0.0,0.0);CHKERRQ(ierr);
> 	
> 	/* velocity */
> 	vbasis_dofs = 2;
> 	overlap     = 1;
> 	ierr = DMDACreate2d(PETSC_COMM_WORLD,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_STENCIL_BOX,mx+1,my+1,PETSC_DECIDE,PETSC_DECIDE,vbasis_dofs,overlap,0,0,&dav);CHKERRQ(ierr);
> 	ierr = DMDASetUniformCoordinates(dav,0.0,1.0, 0.0,1.0, 0.0,0.0);CHKERRQ(ierr);
> 	
> 	/* stokes */
> 	ierr = DMCompositeCreate(PETSC_COMM_WORLD,&multipys_pack);CHKERRQ(ierr);
> 	ierr = DMCompositeAddDM(multipys_pack,dav);CHKERRQ(ierr);	
> 	ierr = DMCompositeAddDM(multipys_pack,dap);CHKERRQ(ierr);	
> 	
> 	{
> 		IS *is;
> 		
> 		ierr = DMCompositeGetGlobalISs(multipys_pack,&is);CHKERRQ(ierr);
> 		
> 		ierr = ISDestroy(&is[0]);CHKERRQ(ierr);
> 		ierr = ISDestroy(&is[1]);CHKERRQ(ierr);
> 		ierr = PetscFree(is);CHKERRQ(ierr);
> 	}
> 	
>   ierr = DMDASetFieldName(dap,0,"p");CHKERRQ(ierr);
> 	ierr = DMDASetFieldName(dav,0,"vx");CHKERRQ(ierr);
> 	ierr = DMDASetFieldName(dav,1,"vy");CHKERRQ(ierr);
>   ierr = PetscObjectSetOptionsPrefix((PetscObject)dap,"p_");CHKERRQ(ierr);
>   ierr = PetscObjectSetOptionsPrefix((PetscObject)dav,"u_");CHKERRQ(ierr);
>   ierr = PetscObjectSetOptionsPrefix((PetscObject)multipys_pack,"stk_");CHKERRQ(ierr);
> 	
> 	ierr = DMDestroy(&dav);CHKERRQ(ierr);
> 	ierr = DMDestroy(&dap);CHKERRQ(ierr);
> 	
> 	*dmc = coeff;
> 	*dms = multipys_pack;
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "_Stokes2DAssembleMatNest"
> PetscErrorCode _Stokes2DAssembleMatNest(const char name[],DM pack,Mat A11,Mat A12,Mat A21,Mat A22,Mat *A)
> {
>   PetscErrorCode ierr;
> 	IS *is;
> 	Mat bA[2][2];
> 	PetscInt i,j;
> 	
>   PetscFunctionBeginUser;
>
> 	bA[0][0] = A11; bA[0][1] = A12;
> 	bA[1][0] = A21; bA[1][1] = A22;
>
> 	/* Create nest */
> 	ierr = DMCompositeGetGlobalISs(pack,&is);CHKERRQ(ierr);
>   ierr = MatCreateNest(((PetscObject)pack)->comm,2,is,2,is,&bA[0][0],A);CHKERRQ(ierr);
> 	ierr = MatSetOptionsPrefix(*A,name);CHKERRQ(ierr);
> 	ierr = MatAssemblyBegin(*A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	ierr = MatAssemblyEnd(*A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
> 	/* hand back destroy ownership to A */
> 	for (i=0; i<2; i++) {
> 		for (j=0; j<2; j++) {
> 			if (bA[i][j]) { ierr = MatDestroy(&bA[i][j]);CHKERRQ(ierr); }
> 		}
> 	}
> 	ierr = ISDestroy(&is[0]);CHKERRQ(ierr);
> 	ierr = ISDestroy(&is[1]);CHKERRQ(ierr);
> 	ierr = PetscFree(is);CHKERRQ(ierr);
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DCreateOperators_Q1P0"
> PetscErrorCode Stokes2DCreateOperators_Q1P0(DM da,Mat *_A,Mat *_B)
> {
>   PetscErrorCode ierr;
> 	DM dav,dap;
> 	MPI_Comm comm;
> 	Mat Auu,Aup,Apu,Spp,App;
> 	PetscInt mu,Mu,mp,Mp,nnz_d;
> 	MatType Aii_type;
> 	Vec X,u,p;
> 	
>   PetscFunctionBeginUser;
>
> 	ierr = DMCompositeGetEntries(da,&dav,&dap);CHKERRQ(ierr);
> 	
> 	comm = ((PetscObject)da)->comm;
> 	ierr = DMCreateMatrix(dav,MATAIJ,&Auu);CHKERRQ(ierr);  ierr = MatSetOptionsPrefix(Auu,"Auu");CHKERRQ(ierr);
> 	ierr = MatCreate(comm,&Aup);CHKERRQ(ierr);             ierr = MatSetOptionsPrefix(Aup,"Aup");CHKERRQ(ierr);
> 	ierr = MatCreate(comm,&Apu);CHKERRQ(ierr);             ierr = MatSetOptionsPrefix(Apu,"Apu");CHKERRQ(ierr);
> 	ierr = DMCreateMatrix(dap,MATAIJ,&Spp);CHKERRQ(ierr);  ierr = MatSetOptionsPrefix(Spp,"Spp");CHKERRQ(ierr);
>
> 	ierr = DMCreateMatrix(dap,MATAIJ,&App);CHKERRQ(ierr);  ierr = MatSetOptionsPrefix(App,"App");CHKERRQ(ierr);
>
> 	
> 	/* Sizes: DM manages the diagonal blocks */
> 	ierr = DMCreateGlobalVector(da,&X);CHKERRQ(ierr);
> 	ierr = DMCompositeGetAccess(da,X,&u,&p);CHKERRQ(ierr);
> 	ierr = VecGetSize(u,&Mu);CHKERRQ(ierr);
> 	ierr = VecGetLocalSize(u,&mu);CHKERRQ(ierr);
> 	ierr = VecGetSize(p,&Mp);CHKERRQ(ierr);
> 	ierr = VecGetLocalSize(p,&mp);CHKERRQ(ierr);
> 	ierr = DMCompositeRestoreAccess(da,X,&u,&p);CHKERRQ(ierr);
> 	ierr = VecDestroy(&X);CHKERRQ(ierr);
> 	
> 	ierr = MatSetSizes(Aup,mu,mp,Mu,Mp);CHKERRQ(ierr);
> 	ierr = MatSetSizes(Apu,mp,mu,Mp,Mu);CHKERRQ(ierr);
> 	
> 	ierr = MatSetBlockSize(Aup,1);CHKERRQ(ierr);
> 	ierr = MatSetBlockSize(Apu,1);CHKERRQ(ierr);
> 	
> 	/* Types: */
> 	ierr = MatGetType(Auu,&Aii_type);CHKERRQ(ierr);
> 	ierr = MatSetType(Aup,Aii_type);CHKERRQ(ierr);
> 	ierr = MatSetType(Apu,Aii_type);CHKERRQ(ierr);
> 	
> 	ierr = MatSetFromOptions(Aup);CHKERRQ(ierr);
> 	ierr = MatSetFromOptions(Apu);CHKERRQ(ierr);
> 	
> 	/* Preallocation: DM manages the diagonal blocks */
> 	nnz_d = 4;
> 	ierr = MatSeqAIJSetPreallocation(Aup,nnz_d,PETSC_NULL);CHKERRQ(ierr);
> 	
> 	nnz_d = 8;
> 	ierr = MatSeqAIJSetPreallocation(Apu,nnz_d,PETSC_NULL);CHKERRQ(ierr);
> 	
> 	/* We do this after we have filled in the non-zero structure */
> 	//ierr = MatSetOption(Aup,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
> 	//ierr = MatSetOption(Apu,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
> 	
> 	ierr = MatSetOption(Auu,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
> 	ierr = MatSetOption(Aup,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
> 	ierr = MatSetOption(Apu,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
> 	
> 	//ierr = MatSetUp(Aup);CHKERRQ(ierr);
> 	//ierr = MatSetUp(Apu);CHKERRQ(ierr);
> 	
> 	ierr = _Stokes2DAssembleOperatorsNNZ_Q1P0(Aup,Apu,dav,dap);CHKERRQ(ierr);
> 	
> 	ierr = _Stokes2DAssembleMatNest("A",da,Auu,Aup,Apu,App,_A);CHKERRQ(ierr);
> 	ierr = _Stokes2DAssembleMatNest("B",da,Auu,Aup,Apu,Spp,_B);CHKERRQ(ierr);
>
> 	ierr = PetscObjectReference((PetscObject)Auu);CHKERRQ(ierr);
> 	ierr = PetscObjectReference((PetscObject)Aup);CHKERRQ(ierr);
> 	ierr = PetscObjectReference((PetscObject)Apu);CHKERRQ(ierr);
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DCreateOperators_Q1mP1"
> PetscErrorCode Stokes2DCreateOperators_Q1mP1(DM da,Mat *_A,Mat *_B)
> {
>   PetscErrorCode ierr;
> 	DM dav,dap;
> 	MPI_Comm comm;
> 	Mat Auu,Aup,Apu,Spp;
> 	PetscInt mu,Mu,mp,Mp,nnz_d;
> 	MatType Aii_type;
> 	Vec X,u,p;
> 	
>   PetscFunctionBeginUser;
> 	
> 	ierr = DMCompositeGetEntries(da,&dav,&dap);CHKERRQ(ierr);
> 	
> 	comm = ((PetscObject)da)->comm;
> 	ierr = DMCreateMatrix(dav,MATAIJ,&Auu);CHKERRQ(ierr);  ierr = MatSetOptionsPrefix(Auu,"Auu");CHKERRQ(ierr);
> 	ierr = MatCreate(comm,&Aup);CHKERRQ(ierr);             ierr = MatSetOptionsPrefix(Aup,"Aup");CHKERRQ(ierr);
> 	ierr = MatCreate(comm,&Apu);CHKERRQ(ierr);             ierr = MatSetOptionsPrefix(Apu,"Apu");CHKERRQ(ierr);
> 	ierr = DMCreateMatrix(dap,MATAIJ,&Spp);CHKERRQ(ierr);  ierr = MatSetOptionsPrefix(Spp,"Spp");CHKERRQ(ierr);
> 	
> 	
> 	/* Sizes: DM manages the diagonal blocks */
> 	ierr = DMCreateGlobalVector(da,&X);CHKERRQ(ierr);
> 	ierr = DMCompositeGetAccess(da,X,&u,&p);CHKERRQ(ierr);
> 	ierr = VecGetSize(u,&Mu);CHKERRQ(ierr);
> 	ierr = VecGetLocalSize(u,&mu);CHKERRQ(ierr);
> 	ierr = VecGetSize(p,&Mp);CHKERRQ(ierr);
> 	ierr = VecGetLocalSize(p,&mp);CHKERRQ(ierr);
> 	ierr = DMCompositeRestoreAccess(da,X,&u,&p);CHKERRQ(ierr);
> 	ierr = VecDestroy(&X);CHKERRQ(ierr);
> 	
> 	ierr = MatSetSizes(Aup,mu,mp,Mu,Mp);CHKERRQ(ierr);
> 	ierr = MatSetSizes(Apu,mp,mu,Mp,Mu);CHKERRQ(ierr);
> 	
> 	ierr = MatSetBlockSize(Aup,1);CHKERRQ(ierr);
> 	ierr = MatSetBlockSize(Apu,1);CHKERRQ(ierr);
> 	
> 	/* Types: */
> 	ierr = MatGetType(Auu,&Aii_type);CHKERRQ(ierr);
> 	ierr = MatSetType(Aup,Aii_type);CHKERRQ(ierr);
> 	ierr = MatSetType(Apu,Aii_type);CHKERRQ(ierr);
> 	
> 	ierr = MatSetFromOptions(Aup);CHKERRQ(ierr);
> 	ierr = MatSetFromOptions(Apu);CHKERRQ(ierr);
> 	
> 	/* Preallocation: DM manages the diagonal blocks */
> 	nnz_d = 12;
> 	ierr = MatSeqAIJSetPreallocation(Aup,nnz_d,PETSC_NULL);CHKERRQ(ierr);
> 	
> 	nnz_d = 18;
> 	ierr = MatSeqAIJSetPreallocation(Apu,nnz_d,PETSC_NULL);CHKERRQ(ierr);
> 	
> 	/* We do this after we have filled in the non-zero structure */
> 	ierr = MatSetOption(Auu,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
> 	ierr = MatSetOption(Aup,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
> 	ierr = MatSetOption(Apu,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
> 	
> 	ierr = _Stokes2DAssembleOperatorsNNZ_Q1mP1(Aup,Apu,dav,dap);CHKERRQ(ierr);
> 	
> 	{
> 		Mat App;
> 		
> 		ierr = DMCreateMatrix(dap,MATAIJ,&App);CHKERRQ(ierr);  ierr = MatSetOptionsPrefix(App,"App");CHKERRQ(ierr);
> 		
> 		ierr = _Stokes2DAssembleMatNest("A",da,Auu,Aup,Apu,App,_A);CHKERRQ(ierr);
> 	}
> 	
> 	ierr = _Stokes2DAssembleMatNest("B",da,Auu,Aup,Apu,Spp,_B);CHKERRQ(ierr);
> 	
> 	ierr = PetscObjectReference((PetscObject)Auu);CHKERRQ(ierr);
> 	ierr = PetscObjectReference((PetscObject)Aup);CHKERRQ(ierr);
> 	ierr = PetscObjectReference((PetscObject)Apu);CHKERRQ(ierr);
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DCreateOperators_Q1mP0"
> PetscErrorCode Stokes2DCreateOperators_Q1mP0(DM da,Mat *_A,Mat *_B)
> {
>   PetscErrorCode ierr;
> 	DM dav,dap;
> 	MPI_Comm comm;
> 	Mat Auu,Aup,Apu,Spp;
> 	PetscInt mu,Mu,mp,Mp,nnz_d;
> 	MatType Aii_type;
> 	Vec X,u,p;
> 	
>   PetscFunctionBeginUser;
> 	
> 	ierr = DMCompositeGetEntries(da,&dav,&dap);CHKERRQ(ierr);
> 	
> 	comm = ((PetscObject)da)->comm;
> 	ierr = DMCreateMatrix(dav,MATAIJ,&Auu);CHKERRQ(ierr);  ierr = MatSetOptionsPrefix(Auu,"Auu");CHKERRQ(ierr);
> 	ierr = MatCreate(comm,&Aup);CHKERRQ(ierr);             ierr = MatSetOptionsPrefix(Aup,"Aup");CHKERRQ(ierr);
> 	ierr = MatCreate(comm,&Apu);CHKERRQ(ierr);             ierr = MatSetOptionsPrefix(Apu,"Apu");CHKERRQ(ierr);
> 	ierr = DMCreateMatrix(dap,MATAIJ,&Spp);CHKERRQ(ierr);  ierr = MatSetOptionsPrefix(Spp,"Spp");CHKERRQ(ierr);
> 	
> 	
> 	/* Sizes: DM manages the diagonal blocks */
> 	ierr = DMCreateGlobalVector(da,&X);CHKERRQ(ierr);
> 	ierr = DMCompositeGetAccess(da,X,&u,&p);CHKERRQ(ierr);
> 	ierr = VecGetSize(u,&Mu);CHKERRQ(ierr);
> 	ierr = VecGetLocalSize(u,&mu);CHKERRQ(ierr);
> 	ierr = VecGetSize(p,&Mp);CHKERRQ(ierr);
> 	ierr = VecGetLocalSize(p,&mp);CHKERRQ(ierr);
> 	ierr = DMCompositeRestoreAccess(da,X,&u,&p);CHKERRQ(ierr);
> 	ierr = VecDestroy(&X);CHKERRQ(ierr);
> 	
> 	ierr = MatSetSizes(Aup,mu,mp,Mu,Mp);CHKERRQ(ierr);
> 	ierr = MatSetSizes(Apu,mp,mu,Mp,Mu);CHKERRQ(ierr);
> 	
> 	ierr = MatSetBlockSize(Aup,1);CHKERRQ(ierr);
> 	ierr = MatSetBlockSize(Apu,1);CHKERRQ(ierr);
> 	
> 	/* Types: */
> 	ierr = MatGetType(Auu,&Aii_type);CHKERRQ(ierr);
> 	ierr = MatSetType(Aup,Aii_type);CHKERRQ(ierr);
> 	ierr = MatSetType(Apu,Aii_type);CHKERRQ(ierr);
> 	
> 	ierr = MatSetFromOptions(Aup);CHKERRQ(ierr);
> 	ierr = MatSetFromOptions(Apu);CHKERRQ(ierr);
> 	
> 	/* Preallocation: DM manages the diagonal blocks */
> 	nnz_d = 12;
> 	ierr = MatSeqAIJSetPreallocation(Aup,nnz_d,PETSC_NULL);CHKERRQ(ierr);
> 	
> 	nnz_d = 18;
> 	ierr = MatSeqAIJSetPreallocation(Apu,nnz_d,PETSC_NULL);CHKERRQ(ierr);
> 	
> 	/* We do this after we have filled in the non-zero structure */
> 	ierr = MatSetOption(Auu,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
> 	ierr = MatSetOption(Aup,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
> 	ierr = MatSetOption(Apu,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
> 	
> 	ierr = _Stokes2DAssembleOperatorsNNZ_Q1mP0(Aup,Apu,dav,dap);CHKERRQ(ierr);
> 	
> 	{
> 		Mat App;
> 		
> 		ierr = DMCreateMatrix(dap,MATAIJ,&App);CHKERRQ(ierr);  ierr = MatSetOptionsPrefix(App,"App");CHKERRQ(ierr);
> 		
> 		ierr = _Stokes2DAssembleMatNest("A",da,Auu,Aup,Apu,App,_A);CHKERRQ(ierr);
> 	}
> 	
> 	ierr = _Stokes2DAssembleMatNest("B",da,Auu,Aup,Apu,Spp,_B);CHKERRQ(ierr);
> 	
> 	ierr = PetscObjectReference((PetscObject)Auu);CHKERRQ(ierr);
> 	ierr = PetscObjectReference((PetscObject)Aup);CHKERRQ(ierr);
> 	ierr = PetscObjectReference((PetscObject)Apu);CHKERRQ(ierr);
> 	
>   PetscFunctionReturn(0);
> }
>
> /*
>  i,j are the element indices
>  The unknown is a vector quantity.
>  The s[].c is used to indicate the degree of freedom.
>  */
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DDMDAGetElementEqnumsU_Q1"
> PetscErrorCode Stokes2DDMDAGetElementEqnumsU_Q1(MatStencil s_u[],PetscInt i,PetscInt j)
> {
>   PetscFunctionBeginUser;
>   /* velocity */
>   /* node 0 */
>   s_u[0].i = i;s_u[0].j = j;s_u[0].c = 0;                         /* Vx0 */
>   s_u[1].i = i;s_u[1].j = j;s_u[1].c = 1;                         /* Vy0 */
> 	
>   /* node 1 */
>   s_u[2].i = i;s_u[2].j = j+1;s_u[2].c = 0;                         /* Vx1 */
>   s_u[3].i = i;s_u[3].j = j+1;s_u[3].c = 1;                         /* Vy1 */
> 	
>   /* node 2 */
>   s_u[4].i = i+1;s_u[4].j = j+1;s_u[4].c = 0;                         /* Vx2 */
>   s_u[5].i = i+1;s_u[5].j = j+1;s_u[5].c = 1;                         /* Vy2 */
> 	
>   /* node 3 */
>   s_u[6].i = i+1;s_u[6].j = j;s_u[6].c = 0;                         /* Vx3 */
>   s_u[7].i = i+1;s_u[7].j = j;s_u[7].c = 1;                         /* Vy3 */
>
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DDMDAGetElementEqnumsP_P0"
> PetscErrorCode Stokes2DDMDAGetElementEqnumsP_P0(MatStencil s_p[],PetscInt i,PetscInt j)
> {
>   PetscFunctionBeginUser;
>   /* pressure */
>   /* node 0 */
>   s_p[0].i = i;s_p[0].j = j;s_p[0].c = 0;
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DDMDAGetElementEqnumsP_P1"
> PetscErrorCode Stokes2DDMDAGetElementEqnumsP_P1(MatStencil s_p[],PetscInt i,PetscInt j)
> {
>   PetscFunctionBeginUser;
>   /* pressure */
>   /* node 0 */
>   s_p[0].i = i;s_p[0].j = j;s_p[0].c = 0;
>   s_p[1].i = i;s_p[1].j = j;s_p[1].c = 1;
>   s_p[2].i = i;s_p[2].j = j;s_p[2].c = 2;
>   PetscFunctionReturn(0);
> }
>
> /*
>  Element: Local basis function ordering
>  1-----2
>  |     |
>  |     |
>  0-----3
>  */
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DDMDAGetDofEqnumU_Q1"
> PetscErrorCode Stokes2DDMDAGetDofEqnumU_Q1(PetscInt eqn[],PetscInt i,PetscInt j,PetscInt gidx[],PetscInt M)
> {
> 	PetscInt *idx;
>   PetscFunctionBeginUser;
>
> 	idx = &gidx[ 2*(i + j*M) ];
> 	eqn[0] = idx[0];
> 	eqn[1] = idx[1];
>
> 	idx = &gidx[ 2*(i + (j+1)*M) ];
> 	eqn[2] = idx[0];
> 	eqn[3] = idx[1];
> 	
> 	idx = &gidx[ 2*((i+1) + (j+1)*M) ];
> 	eqn[4] = idx[0];
> 	eqn[5] = idx[1];
>
> 	idx = &gidx[ 2*((i+1) + j*M) ];
> 	eqn[6] = idx[0];
> 	eqn[7] = idx[1];
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DDMDAGetDofEqnumP_P0"
> PetscErrorCode Stokes2DDMDAGetDofEqnumP_P0(PetscInt eqn[],PetscInt i,PetscInt j,PetscInt gidx[],PetscInt M)
> {
> 	PetscInt idx;
>   PetscFunctionBeginUser;
> 	
> 	idx = gidx[ i + j*M ];
> 	eqn[0] = idx;
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DDMDAGetDofEqnumP_P1"
> PetscErrorCode Stokes2DDMDAGetDofEqnumP_P1(PetscInt eqn[],PetscInt i,PetscInt j,PetscInt gidx[],PetscInt M)
> {
> 	PetscInt *idx;
>   PetscFunctionBeginUser;
> 	
> 	idx = &gidx[ 3*(i + j*M) ];
> 	eqn[0] = idx[0];
> 	eqn[1] = idx[1];
> 	eqn[2] = idx[2];
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "_Stokes2DAssembleOperatorsNNZ_Q1P0"
> PetscErrorCode _Stokes2DAssembleOperatorsNNZ_Q1P0(Mat Aup,Mat Apu,DM dav,DM dap)
> {
>   PetscInt               sex,sey,mx,my;
>   PetscInt               ei,ej;
>   PetscScalar            Ge[4*U_DOFS*1*P_DOFS];
>   PetscScalar            De[1*P_DOFS*4*U_DOFS];
> 	PetscInt               u_idx[NODES_PER_EL*U_DOFS];
> 	PetscInt               p_idx[1*P_DOFS];
> 	PetscInt               *LA_gidx_u,*LA_gidx_p,mg_u,mg_p;
>   PetscErrorCode         ierr;
> 	
>   PetscFunctionBeginUser;
> 	
> 	
> 	/* get indices */
> 	ierr = DMDAGetGlobalIndices(dav,0,&LA_gidx_u);CHKERRQ(ierr);
> 	ierr = DMDAGetGlobalIndices(dap,0,&LA_gidx_p);CHKERRQ(ierr);
> 	ierr = DMDAGetGhostCorners(dav,0,0,0,&mg_u,0,0);CHKERRQ(ierr);
> 	ierr = DMDAGetGhostCorners(dap,0,0,0,&mg_p,0,0);CHKERRQ(ierr);
> 	
>   ierr = DMDAGetElementCorners(dav,&sex,&sey,0,&mx,&my,0);CHKERRQ(ierr);
>   for (ej = sey; ej < sey+my; ej++) {
>     for (ei = sex; ei < sex+mx; ei++) {
>       /* initialise element stiffness matrix */
>       ierr = PetscMemzero(Ge,sizeof(PetscScalar)*4*U_DOFS*1*P_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(De,sizeof(PetscScalar)*1*P_DOFS*4*U_DOFS);CHKERRQ(ierr);
>
>       /* insert element matrix into global matrix */
> 			ierr = Stokes2DDMDAGetDofEqnumU_Q1(u_idx,ei,ej,LA_gidx_u,mg_u);CHKERRQ(ierr);
> 			ierr = Stokes2DDMDAGetDofEqnumP_P0(p_idx,ei,ej,LA_gidx_p,mg_p);CHKERRQ(ierr);
> 			
>       ierr = MatSetValues(Aup,4*U_DOFS,u_idx,1*P_DOFS,p_idx,Ge,INSERT_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValues(Apu,1*P_DOFS,p_idx,4*U_DOFS,u_idx,De,INSERT_VALUES);CHKERRQ(ierr);
>     }
>   }
> 	
> 	/* assemble sub mats */
>   ierr = MatAssemblyBegin(Aup,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Aup,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   ierr = MatAssemblyBegin(Apu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Apu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "_Stokes2DAssembleOperatorsNNZ_Q1mP1"
> PetscErrorCode _Stokes2DAssembleOperatorsNNZ_Q1mP1(Mat Aup,Mat Apu,DM dav,DM dap)
> {
>   PetscInt               sex,sey,mx,my;
>   PetscInt               ei,ej,mei,mej;
>   PetscScalar            Ge[4*U_DOFS*3*P_DOFS];
>   PetscScalar            De[3*P_DOFS*4*U_DOFS];
> 	PetscInt               u_idx[NODES_PER_EL*U_DOFS];
> 	PetscInt               p_idx[3*P_DOFS];
> 	PetscInt               *LA_gidx_u,*LA_gidx_p,mg_u,mg_p;
>   PetscErrorCode         ierr;
> 	
>   PetscFunctionBeginUser;
> 	
> 	
> 	/* get indices */
> 	ierr = DMDAGetGlobalIndices(dav,0,&LA_gidx_u);CHKERRQ(ierr);
> 	ierr = DMDAGetGlobalIndices(dap,0,&LA_gidx_p);CHKERRQ(ierr);
> 	ierr = DMDAGetGhostCorners(dav,0,0,0,&mg_u,0,0);CHKERRQ(ierr);
> 	ierr = DMDAGetGhostCorners(dap,0,0,0,&mg_p,0,0);CHKERRQ(ierr);
> 	
>   ierr = DMDAGetElementCorners(dav,&sex,&sey,0,&mx,&my,0);CHKERRQ(ierr);
>   for (ej = sey; ej < sey+my; ej++) {
> 		mej = ej/2;
>     for (ei = sex; ei < sex+mx; ei++) {
> 			mei = ei/2;
> 			
>       /* initialise element stiffness matrix */
>       ierr = PetscMemzero(Ge,sizeof(PetscScalar)*4*U_DOFS*3*P_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(De,sizeof(PetscScalar)*3*P_DOFS*4*U_DOFS);CHKERRQ(ierr);
> 			
>       /* insert element matrix into global matrix */
> 			ierr = Stokes2DDMDAGetDofEqnumU_Q1(u_idx,ei,ej,LA_gidx_u,mg_u);CHKERRQ(ierr);
> 			ierr = Stokes2DDMDAGetDofEqnumP_P1(p_idx,mei,mej,LA_gidx_p,mg_p);CHKERRQ(ierr);
> 			
>       ierr = MatSetValues(Aup,4*U_DOFS,u_idx,3*P_DOFS,p_idx,Ge,INSERT_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValues(Apu,3*P_DOFS,p_idx,4*U_DOFS,u_idx,De,INSERT_VALUES);CHKERRQ(ierr);
>     }
>   }
> 	
> 	/* assemble sub mats */
>   ierr = MatAssemblyBegin(Aup,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Aup,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   ierr = MatAssemblyBegin(Apu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Apu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "_Stokes2DAssembleOperatorsNNZ_Q1mP0"
> PetscErrorCode _Stokes2DAssembleOperatorsNNZ_Q1mP0(Mat Aup,Mat Apu,DM dav,DM dap)
> {
>   PetscInt               sex,sey,mx,my;
>   PetscInt               ei,ej,mei,mej;
>   PetscScalar            Ge[4*U_DOFS*1*P_DOFS];
>   PetscScalar            De[1*P_DOFS*4*U_DOFS];
> 	PetscInt               u_idx[NODES_PER_EL*U_DOFS];
> 	PetscInt               p_idx[1*P_DOFS];
> 	PetscInt               *LA_gidx_u,*LA_gidx_p,mg_u,mg_p;
>   PetscErrorCode         ierr;
> 	
>   PetscFunctionBeginUser;
> 	
> 	
> 	/* get indices */
> 	ierr = DMDAGetGlobalIndices(dav,0,&LA_gidx_u);CHKERRQ(ierr);
> 	ierr = DMDAGetGlobalIndices(dap,0,&LA_gidx_p);CHKERRQ(ierr);
> 	ierr = DMDAGetGhostCorners(dav,0,0,0,&mg_u,0,0);CHKERRQ(ierr);
> 	ierr = DMDAGetGhostCorners(dap,0,0,0,&mg_p,0,0);CHKERRQ(ierr);
> 	
>   ierr = DMDAGetElementCorners(dav,&sex,&sey,0,&mx,&my,0);CHKERRQ(ierr);
>   for (ej = sey; ej < sey+my; ej++) {
> 		mej = ej/2;
>     for (ei = sex; ei < sex+mx; ei++) {
> 			mei = ei/2;
> 			
>       /* initialise element stiffness matrix */
>       ierr = PetscMemzero(Ge,sizeof(PetscScalar)*4*U_DOFS*1*P_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(De,sizeof(PetscScalar)*1*P_DOFS*4*U_DOFS);CHKERRQ(ierr);
> 			
>       /* insert element matrix into global matrix */
> 			ierr = Stokes2DDMDAGetDofEqnumU_Q1(u_idx,ei,ej,LA_gidx_u,mg_u);CHKERRQ(ierr);
> 			ierr = Stokes2DDMDAGetDofEqnumP_P0(p_idx,mei,mej,LA_gidx_p,mg_p);CHKERRQ(ierr);
> 			
>       ierr = MatSetValues(Aup,4*U_DOFS,u_idx,1*P_DOFS,p_idx,Ge,INSERT_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValues(Apu,1*P_DOFS,p_idx,4*U_DOFS,u_idx,De,INSERT_VALUES);CHKERRQ(ierr);
>     }
>   }
> 	
> 	/* assemble sub mats */
>   ierr = MatAssemblyBegin(Aup,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Aup,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   ierr = MatAssemblyBegin(Apu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Apu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DAssembleOperators_Q1P0"
> PetscErrorCode Stokes2DAssembleOperators_Q1P0(Mat A,Mat B,DM dm_stokes,DM dm_coeff,Vec coeff)
> {
>   DM                     dav,dap,cda;
>   Vec                    coords;
>   DMDACoor2d             **LA_coords;
>   MatStencil             u_eqn[NODES_PER_EL*U_DOFS]; /* 2 degrees of freedom */
>   MatStencil             p_eqn[NODES_PER_EL*P_DOFS]; /* 1 degrees of freedom */
>   PetscInt               sex,sey,mx,my;
>   PetscInt               ei,ej;
>   PetscScalar            Ae[4*U_DOFS*4*U_DOFS];
>   PetscScalar            Ge[4*U_DOFS*1*P_DOFS];
>   PetscScalar            De[1*P_DOFS*4*U_DOFS];
>   PetscScalar            Se[1*P_DOFS*1*P_DOFS];
>   PetscScalar            el_coords[4*NSD];
>   GaussPointCoefficients **LA_coeff;
>   PetscScalar            *coeff_eta;
> 	IS                     *is;
> 	Mat                    Auu,Aup,Apu,Spp;
> 	PetscInt               u_idx[NODES_PER_EL*U_DOFS];
> 	PetscInt               p_idx[1*P_DOFS];
> 	PetscInt               *LA_gidx_u,*LA_gidx_p,mg_u,mg_p;
>   PetscErrorCode         ierr;
> 	
>   PetscFunctionBeginUser;
>
> 	/* access dm for u,p */
> 	ierr = DMCompositeGetEntries(dm_stokes,&dav,&dap);CHKERRQ(ierr);
>
>   /* access for coords */
>   ierr = DMGetCoordinateDM(dav,&cda);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(dav,&coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cda,coords,&LA_coords);CHKERRQ(ierr);
> 	
>   /* access for coefficients */
>   ierr = DMDAVecGetArray(dm_coeff,coeff,&LA_coeff);CHKERRQ(ierr);
> 	
> 	/* access mat nest */
> 	ierr = DMCompositeGetGlobalISs(dm_stokes,&is);CHKERRQ(ierr);
> 	
> 	ierr = MatGetSubMatrix(A,is[0],is[0],MAT_INITIAL_MATRIX,&Auu);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[0],is[1],MAT_INITIAL_MATRIX,&Aup);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[1],is[0],MAT_INITIAL_MATRIX,&Apu);CHKERRQ(ierr);
>
> 	ierr = MatGetSubMatrix(B,is[1],is[1],MAT_INITIAL_MATRIX,&Spp);CHKERRQ(ierr);
> 	
> 	/* get indices */
> 	ierr = DMDAGetGlobalIndices(dav,0,&LA_gidx_u);CHKERRQ(ierr);
> 	ierr = DMDAGetGlobalIndices(dap,0,&LA_gidx_p);CHKERRQ(ierr);
> 	ierr = DMDAGetGhostCorners(dav,0,0,0,&mg_u,0,0);CHKERRQ(ierr);
> 	ierr = DMDAGetGhostCorners(dap,0,0,0,&mg_p,0,0);CHKERRQ(ierr);
> 	
>   ierr = DMDAGetElementCorners(dav,&sex,&sey,0,&mx,&my,0);CHKERRQ(ierr);
>   for (ej = sey; ej < sey+my; ej++) {
>     for (ei = sex; ei < sex+mx; ei++) {
>       /* get coords for the element */
>       ierr = GetElementCoords(LA_coords,ei,ej,el_coords);CHKERRQ(ierr);
> 			
>       /* get coefficients for the element */
>       coeff_eta = LA_coeff[ej][ei].eta;
> 			
>       /* initialise element stiffness matrix */
>       ierr = PetscMemzero(Ae,sizeof(PetscScalar)*4*U_DOFS*4*U_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(Ge,sizeof(PetscScalar)*4*U_DOFS*1*P_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(De,sizeof(PetscScalar)*1*P_DOFS*4*U_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(Se,sizeof(PetscScalar)*1*P_DOFS*1*P_DOFS);CHKERRQ(ierr);
> 			
>       /* form element stiffness matrix */
>       FormStressOperatorQ1(Ae,el_coords,coeff_eta);
>       FormGradientOperatorQ1P0(Ge,el_coords);
>       FormDivergenceOperator(De,Ge,4,1);
> 			FormScaledMassMatrixOperatorP0P0(Se,el_coords,coeff_eta);
> 			
>       /* insert element matrix into global matrix */
>       ierr = Stokes2DDMDAGetElementEqnumsU_Q1(u_eqn,ei,ej);CHKERRQ(ierr);
>       ierr = Stokes2DDMDAGetElementEqnumsP_P0(p_eqn,ei,ej);CHKERRQ(ierr);
> 			
> 			ierr = Stokes2DDMDAGetDofEqnumU_Q1(u_idx,ei,ej,LA_gidx_u,mg_u);CHKERRQ(ierr);
> 			ierr = Stokes2DDMDAGetDofEqnumP_P0(p_idx,ei,ej,LA_gidx_p,mg_p);CHKERRQ(ierr);
> 			
> //			printf("[%d,%d] - u = { %d,%d,%d,%d } \n",ei,ej,u_idx[2*0],u_idx[2*1],u_idx[2*2],u_idx[2*3]);
> //			printf("[%d,%d] - v = { %d,%d,%d,%d } \n",ei,ej,u_idx[2*0+1],u_idx[2*1+1],u_idx[2*2+1],u_idx[2*3+1]);
> //			printf("[%d,%d] - p = { %d } \n",ei,ej,p_idx[0]);
> 			
>       ierr = MatSetValuesStencil(Auu,4*U_DOFS,u_eqn,4*U_DOFS,u_eqn,Ae,ADD_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValues(Aup,4*U_DOFS,u_idx,1*P_DOFS,p_idx,Ge,ADD_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValues(Apu,1*P_DOFS,p_idx,4*U_DOFS,u_idx,De,ADD_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValuesStencil(Spp,1*P_DOFS,p_eqn,1*P_DOFS,p_eqn,Se,ADD_VALUES);CHKERRQ(ierr);
>     }
>   }
> 	
> 	/* assemble sub mats */
>   ierr = MatAssemblyBegin(Auu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Auu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>
>   ierr = MatAssemblyBegin(Aup,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Aup,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>
>   ierr = MatAssemblyBegin(Apu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Apu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>
>   ierr = MatAssemblyBegin(Spp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Spp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
> //	MatView(Auu,PETSC_VIEWER_STDOUT_WORLD);
> //	MatView(Aup,PETSC_VIEWER_STDOUT_WORLD);
> //	MatView(Apu,PETSC_VIEWER_STDOUT_WORLD);
> 	
> 	/* assemble nest */
>   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>
>   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>
> 	/* return destroy to A,B */
> 	ierr = MatDestroy(&Auu);CHKERRQ(ierr);
> 	ierr = MatDestroy(&Aup);CHKERRQ(ierr);
> 	ierr = MatDestroy(&Apu);CHKERRQ(ierr);
> 	ierr = MatDestroy(&Spp);CHKERRQ(ierr);
> 	
>   ierr = DMDAVecRestoreArray(cda,coords,&LA_coords);CHKERRQ(ierr);
>   ierr = DMDAVecRestoreArray(dm_coeff,coeff,&LA_coeff);CHKERRQ(ierr);
> 	ierr = ISDestroy(&is[0]);CHKERRQ(ierr);
> 	ierr = ISDestroy(&is[1]);CHKERRQ(ierr);
> 	ierr = PetscFree(is);CHKERRQ(ierr);
>
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DAssembleOperators_Q1P0_stab"
> PetscErrorCode Stokes2DAssembleOperators_Q1P0_stab(Mat A,Mat B,DM dm_stokes,DM dm_coeff,Vec coeff)
> {
>   DM                     dav,dap,cda;
>   Vec                    coords;
>   DMDACoor2d             **LA_coords;
>   MatStencil             u_eqn[NODES_PER_EL*U_DOFS]; /* 2 degrees of freedom */
>   MatStencil             p_eqn[NODES_PER_EL*P_DOFS]; /* 1 degrees of freedom */
>   PetscInt               sex,sey,mx,my;
>   PetscInt               ei,ej;
>   PetscScalar            Ae[4*U_DOFS*4*U_DOFS];
>   PetscScalar            Ge[4*U_DOFS*1*P_DOFS];
>   PetscScalar            De[1*P_DOFS*4*U_DOFS];
>   PetscScalar            Se[1*P_DOFS*1*P_DOFS];
>   PetscScalar            Ce[4*4];
>   PetscScalar            el_coords[4*NSD];
>   GaussPointCoefficients **LA_coeff;
>   PetscScalar            *coeff_eta;
> 	IS                     *is;
> 	Mat                    Auu,Aup,Apu,App,Spp;
> 	PetscInt               u_idx[NODES_PER_EL*U_DOFS];
> 	PetscInt               p_idx[1*P_DOFS];
> 	PetscInt               *LA_gidx_u,*LA_gidx_p,mg_u,mg_p;
> 	PetscBool add_stab2_pc = PETSC_FALSE;
>   PetscErrorCode         ierr;
> 	
>   PetscFunctionBeginUser;
> 	
> 	
> 	PetscOptionsGetBool(PETSC_NULL,"-add_stab_to_schurpc",&add_stab2_pc,0);
> 	
> 	/* access dm for u,p */
> 	ierr = DMCompositeGetEntries(dm_stokes,&dav,&dap);CHKERRQ(ierr);
> 	
>   /* access for coords */
>   ierr = DMGetCoordinateDM(dav,&cda);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(dav,&coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cda,coords,&LA_coords);CHKERRQ(ierr);
> 	
>   /* access for coefficients */
>   ierr = DMDAVecGetArray(dm_coeff,coeff,&LA_coeff);CHKERRQ(ierr);
> 	
> 	/* access mat nest */
> 	ierr = DMCompositeGetGlobalISs(dm_stokes,&is);CHKERRQ(ierr);
> 	
> 	ierr = MatGetSubMatrix(A,is[0],is[0],MAT_INITIAL_MATRIX,&Auu);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[0],is[1],MAT_INITIAL_MATRIX,&Aup);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[1],is[0],MAT_INITIAL_MATRIX,&Apu);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[1],is[1],MAT_INITIAL_MATRIX,&App);CHKERRQ(ierr);
> 	
> 	ierr = MatGetSubMatrix(B,is[1],is[1],MAT_INITIAL_MATRIX,&Spp);CHKERRQ(ierr);
> 	
> 	/* get indices */
> 	ierr = DMDAGetGlobalIndices(dav,0,&LA_gidx_u);CHKERRQ(ierr);
> 	ierr = DMDAGetGlobalIndices(dap,0,&LA_gidx_p);CHKERRQ(ierr);
> 	ierr = DMDAGetGhostCorners(dav,0,0,0,&mg_u,0,0);CHKERRQ(ierr);
> 	ierr = DMDAGetGhostCorners(dap,0,0,0,&mg_p,0,0);CHKERRQ(ierr);
> 	
>   ierr = DMDAGetElementCorners(dav,&sex,&sey,0,&mx,&my,0);CHKERRQ(ierr);
>   for (ej = sey; ej < sey+my; ej++) {
>     for (ei = sex; ei < sex+mx; ei++) {
>
>       /* get coords for the element */
>       ierr = GetElementCoords(LA_coords,ei,ej,el_coords);CHKERRQ(ierr);
> 			
>       /* get coefficients for the element */
>       coeff_eta = LA_coeff[ej][ei].eta;
> 			
>       /* initialise element stiffness matrix */
>       ierr = PetscMemzero(Ae,sizeof(PetscScalar)*4*U_DOFS*4*U_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(Ge,sizeof(PetscScalar)*4*U_DOFS*1*P_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(De,sizeof(PetscScalar)*1*P_DOFS*4*U_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(Se,sizeof(PetscScalar)*1*P_DOFS*1*P_DOFS);CHKERRQ(ierr);
>
> 			
>       /* form element stiffness matrix */
>       FormStressOperatorQ1(Ae,el_coords,coeff_eta);
>       FormGradientOperatorQ1P0(Ge,el_coords);
>       FormDivergenceOperator(De,Ge,4,1);
> 			FormScaledMassMatrixOperatorP0P0(Se,el_coords,coeff_eta);
> 			
>       /* insert element matrix into global matrix */
>       ierr = Stokes2DDMDAGetElementEqnumsU_Q1(u_eqn,ei,ej);CHKERRQ(ierr);
>       ierr = Stokes2DDMDAGetElementEqnumsP_P0(p_eqn,ei,ej);CHKERRQ(ierr);
> 			
> 			ierr = Stokes2DDMDAGetDofEqnumU_Q1(u_idx,ei,ej,LA_gidx_u,mg_u);CHKERRQ(ierr);
> 			ierr = Stokes2DDMDAGetDofEqnumP_P0(p_idx,ei,ej,LA_gidx_p,mg_p);CHKERRQ(ierr);
> 			
> 			//			printf("[%d,%d] - u = { %d,%d,%d,%d } \n",ei,ej,u_idx[2*0],u_idx[2*1],u_idx[2*2],u_idx[2*3]);
> 			//			printf("[%d,%d] - v = { %d,%d,%d,%d } \n",ei,ej,u_idx[2*0+1],u_idx[2*1+1],u_idx[2*2+1],u_idx[2*3+1]);
> 			//			printf("[%d,%d] - p = { %d } \n",ei,ej,p_idx[0]);
>
> 			if ( (ei%2 == 0) && (ej%2==0) ) {
> 				PetscInt    stab_idx[4],mi;
> 				PetscScalar el_coords_macro[4*NSD];
> 				PetscScalar coeff_eta_macro[4] = { 0.0, 0.0, 0.0, 0.0 };
> 				
> 				/* get indices for the macro */
> 				stab_idx[0] = ei + ej * mx;
> 				stab_idx[1] = (ei+1) + (ej) * mx;
> 				stab_idx[2] = (ei+1) + (ej+1) * mx;
> 				stab_idx[3] = (ei  ) + (ej+1) * mx;
>
> 				/* get coords for the macro */
> 				ierr = GetElementCoordsMacro(LA_coords,ei,ej,el_coords_macro);CHKERRQ(ierr);
>
> 				/* get coefficients for the macro element */
> 				for (mi=0; mi<4; mi++) {
> 					coeff_eta_macro[0] += LA_coeff[ej][ei].eta[mi];
> 					coeff_eta_macro[0] += LA_coeff[ej][ei+1].eta[mi];
> 					coeff_eta_macro[0] += LA_coeff[ej+1][ei+1].eta[mi];
> 					coeff_eta_macro[0] += LA_coeff[ej+1][ei].eta[mi];
> 				}
> 				coeff_eta_macro[0] = (1.0/16.0)*coeff_eta_macro[0];
> 				//coeff_eta_macro[0] = 1.0/coeff_eta_macro[0];
> 				coeff_eta_macro[1] = coeff_eta_macro[0];
> 				coeff_eta_macro[2] = coeff_eta_macro[0];
> 				coeff_eta_macro[3] = coeff_eta_macro[0];
> 				
> 				//printf("ei,ej %d %d --> stab %d %d %d %d \n", ei,ej,stab_idx[0],stab_idx[1],stab_idx[2],stab_idx[3]);
>
> 				ierr = PetscMemzero(Ce,sizeof(PetscScalar)*4*4);CHKERRQ(ierr);
> 				FormEdgeStabP0(Ce,el_coords_macro,coeff_eta_macro,PETSC_FALSE);
> 				
> 				/* not great for vv stokes unless you use A10.approx(inv(A00)).A01 as the schur pc ...*/
> 				//FormNullspaceStabP0(Ce,el_coords_macro,coeff_eta_macro);
> 				
> 				ierr = MatSetValues(App,4,stab_idx,4,stab_idx,Ce,ADD_VALUES);CHKERRQ(ierr);
>
> 				if (add_stab2_pc) {
> 					//for (mi=0; mi<16; mi++) {
> 					//	Ce[mi] = -Ce[mi];
> 					//}
> 					ierr = MatSetValues(Spp,4,stab_idx,4,stab_idx,Ce,ADD_VALUES);CHKERRQ(ierr);
> 				}
> 			}
> 			
> 			
> 			
>       ierr = MatSetValuesStencil(Auu,4*U_DOFS,u_eqn,4*U_DOFS,u_eqn,Ae,ADD_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValues(Aup,4*U_DOFS,u_idx,1*P_DOFS,p_idx,Ge,ADD_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValues(Apu,1*P_DOFS,p_idx,4*U_DOFS,u_idx,De,ADD_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValuesStencil(Spp,1*P_DOFS,p_eqn,1*P_DOFS,p_eqn,Se,ADD_VALUES);CHKERRQ(ierr);
>     }
>   }
> 	
> 	/* assemble sub mats */
>   ierr = MatAssemblyBegin(Auu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Auu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   ierr = MatAssemblyBegin(Aup,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Aup,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   ierr = MatAssemblyBegin(Apu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Apu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>
>   ierr = MatAssemblyBegin(App,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(App,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   ierr = MatAssemblyBegin(Spp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Spp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
> 	//	MatView(Auu,PETSC_VIEWER_STDOUT_WORLD);
> 	//	MatView(Aup,PETSC_VIEWER_STDOUT_WORLD);
> 	//	MatView(Apu,PETSC_VIEWER_STDOUT_WORLD);
> 	//MatView(App,PETSC_VIEWER_STDOUT_WORLD);
> 	
> 	/* assemble nest */
>   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
> 	/* return destroy to A,B */
> 	ierr = MatDestroy(&Auu);CHKERRQ(ierr);
> 	ierr = MatDestroy(&Aup);CHKERRQ(ierr);
> 	ierr = MatDestroy(&Apu);CHKERRQ(ierr);
> 	ierr = MatDestroy(&App);CHKERRQ(ierr);
> 	ierr = MatDestroy(&Spp);CHKERRQ(ierr);
> 	
>   ierr = DMDAVecRestoreArray(cda,coords,&LA_coords);CHKERRQ(ierr);
>   ierr = DMDAVecRestoreArray(dm_coeff,coeff,&LA_coeff);CHKERRQ(ierr);
> 	ierr = ISDestroy(&is[0]);CHKERRQ(ierr);
> 	ierr = ISDestroy(&is[1]);CHKERRQ(ierr);
> 	ierr = PetscFree(is);CHKERRQ(ierr);
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DAssembleOperators_Q1mP1"
> PetscErrorCode Stokes2DAssembleOperators_Q1mP1(Mat A,Mat B,DM dm_stokes,DM dm_coeff,Vec coeff)
> {
>   DM                     dav,dap,cda;
>   Vec                    coords;
>   DMDACoor2d             **LA_coords;
>   MatStencil             u_eqn[NODES_PER_EL*U_DOFS]; /* 2 degrees of freedom */
>   MatStencil             p_eqn[NODES_PER_EL*P_DOFS]; /* 1 degrees of freedom */
>   PetscInt               sex,sey,mx,my;
>   PetscInt               ei,ej,mei,mej,lmei,lmej,qp,kk;
>   PetscScalar            Ae[4*U_DOFS*4*U_DOFS];
>   PetscScalar            Ge[4*U_DOFS*3*P_DOFS];
>   PetscScalar            De[3*P_DOFS*4*U_DOFS];
>   PetscScalar            Se[3*P_DOFS*3*P_DOFS];
>   PetscScalar            el_coords[4*NSD],el_coords_macro[4*NSD];
>   GaussPointCoefficients **LA_coeff;
>   PetscScalar            *coeff_eta;
> 	IS                     *is;
> 	Mat                    Auu,Aup,Apu,Spp;
> 	PetscInt               u_idx[NODES_PER_EL*U_DOFS];
> 	PetscInt               p_idx[3*P_DOFS];
> 	PetscInt               *LA_gidx_u,*LA_gidx_p,mg_u,mg_p;
> 	PetscScalar            Ni_p[4],Mi_p[GAUSS_POINTS][3],qxi_macro[2][2][4*2],qw_macro[2][2][4];
>   PetscErrorCode         ierr;
> 	
>   PetscFunctionBeginUser;
> 	
> 	{
> 		PetscInt    ngp,ii,jj,p;
> 		PetscScalar gp_xi[GAUSS_POINTS][2];
> 		PetscScalar gp_weight[GAUSS_POINTS];
> 		PetscScalar sti,stj;
>
> 		/* define quadrature rule */
> 		ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
> 		
> 		/* scale coords and weights for sub cell integration over macro */
> 		//
> 		// -1 <= xi,eta <= 1   --->>   sti <= xi,eta <= sti+1
> 		// [xi0 - (-1)]/2 = [xin - (sti)]/1
> 		//
> 		//
> 		
> 		for (ii=0; ii<2; ii++) {
> 			for (jj=0; jj<2; jj++) {
> 				sti = -1.0 + 1.0 * ii;
> 				stj = -1.0 + 1.0 * jj;
> 				
> 				for (p=0; p<GAUSS_POINTS; p++) {
> 					PetscScalar xi0,eta0;
> 					
> 					xi0  = gp_xi[p][0];
> 					eta0 = gp_xi[p][1];
> 					
> 					qxi_macro[ii][jj][2*p+0] = 0.5*(xi0-(-1.0)) + sti;
> 					qxi_macro[ii][jj][2*p+1] = 0.5*(eta0-(-1.0)) + stj;
> 				}
> 				
> 			}
> 		}
> 		
> 		
> 		for (ii=0; ii<2; ii++) {
> 			for (jj=0; jj<2; jj++) {
> 				qw_macro[ii][jj][0] = (1.0/16.0) * gp_weight[0];
> 				qw_macro[ii][jj][1] = (1.0/16.0) * gp_weight[1];
> 				qw_macro[ii][jj][2] = (1.0/16.0) * gp_weight[2];
> 				qw_macro[ii][jj][3] = (1.0/16.0) * gp_weight[3];
> 			}
> 		}
> 		
> 		for (ii=0; ii<2; ii++) {
> 			for (jj=0; jj<2; jj++) {
> 				printf("[%d][%d] - xi:[(%+1.4e %+1.4e) (%+1.4e %+1.4e) (%+1.4e %+1.4e) (%+1.4e %+1.4e)] w:[%1.4e %1.4e %1.4e %1.4e] \n",
> 							 ii,jj,qxi_macro[ii][jj][2*0+0],qxi_macro[ii][jj][2*0+1], qxi_macro[ii][jj][2*1+0],qxi_macro[ii][jj][2*1+1], qxi_macro[ii][jj][2*2+0],qxi_macro[ii][jj][2*2+1], qxi_macro[ii][jj][2*3+0],qxi_macro[ii][jj][2*3+1],
> 							 qw_macro[ii][jj][0],qw_macro[ii][jj][1],qw_macro[ii][jj][2],qw_macro[ii][jj][3] );
> 			}
> 		}
> 		
> 	}
> 	
> 	
> 	/* access dm for u,p */
> 	ierr = DMCompositeGetEntries(dm_stokes,&dav,&dap);CHKERRQ(ierr);
> 	
>   /* access for coords */
>   ierr = DMGetCoordinateDM(dav,&cda);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(dav,&coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cda,coords,&LA_coords);CHKERRQ(ierr);
> 	
>   /* access for coefficients */
>   ierr = DMDAVecGetArray(dm_coeff,coeff,&LA_coeff);CHKERRQ(ierr);
> 	
> 	/* access mat nest */
> 	ierr = DMCompositeGetGlobalISs(dm_stokes,&is);CHKERRQ(ierr);
> 	
> 	ierr = MatGetSubMatrix(A,is[0],is[0],MAT_INITIAL_MATRIX,&Auu);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[0],is[1],MAT_INITIAL_MATRIX,&Aup);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[1],is[0],MAT_INITIAL_MATRIX,&Apu);CHKERRQ(ierr);
> 	
> 	ierr = MatGetSubMatrix(B,is[1],is[1],MAT_INITIAL_MATRIX,&Spp);CHKERRQ(ierr);
> 	
> 	/* get indices */
> 	ierr = DMDAGetGlobalIndices(dav,0,&LA_gidx_u);CHKERRQ(ierr);
> 	ierr = DMDAGetGlobalIndices(dap,0,&LA_gidx_p);CHKERRQ(ierr);
> 	ierr = DMDAGetGhostCorners(dav,0,0,0,&mg_u,0,0);CHKERRQ(ierr);
> 	ierr = DMDAGetGhostCorners(dap,0,0,0,&mg_p,0,0);CHKERRQ(ierr);
> 	
>   ierr = DMDAGetElementCorners(dav,&sex,&sey,0,&mx,&my,0);CHKERRQ(ierr);
>   for (ej = sey; ej < sey+my; ej++) {
> 		mej = ej/2;
>     for (ei = sex; ei < sex+mx; ei++) {
> 			mei = ei/2;
>
> 			if (ei%2 == 0) { lmei = 0; } else { lmei = 1; }
> 			if (ej%2 == 0) { lmej = 0; } else { lmej = 1; }
> 			
> 			/* get indices for the element and macro */
> 			ierr = Stokes2DDMDAGetDofEqnumU_Q1(u_idx,ei,ej,LA_gidx_u,mg_u);CHKERRQ(ierr);
> 			ierr = Stokes2DDMDAGetDofEqnumP_P1(p_idx,mei,mej,LA_gidx_p,mg_p);CHKERRQ(ierr);
>
>       /* get stencil indices for element and macro */
>       ierr = Stokes2DDMDAGetElementEqnumsU_Q1(u_eqn,ei,ej);CHKERRQ(ierr);
>       ierr = Stokes2DDMDAGetElementEqnumsP_P1(p_eqn,mei,mej);CHKERRQ(ierr);
> 			
> 			//printf("e[%d,%d] - u = { %d,%d,%d,%d } \n",ei,ej,u_idx[2*0],u_idx[2*1],u_idx[2*2],u_idx[2*3]);
> 			//printf("e[%d,%d] - v = { %d,%d,%d,%d } \n",ei,ej,u_idx[2*0+1],u_idx[2*1+1],u_idx[2*2+1],u_idx[2*3+1]);
> 			//printf("e[%d,%d];me[%d,%d] - p = { %d,%d,%d } \n",ei,ej,mei,mej,p_idx[0],p_idx[1],p_idx[2]);
> 			
>       /* get coords for the element */
>       ierr = GetElementCoords(LA_coords,ei,ej,el_coords);CHKERRQ(ierr);
> 			
> 			/* get coords for the macro */
> 			ierr = GetElementCoordsMacro(LA_coords,2*mei,2*mej,el_coords_macro);CHKERRQ(ierr);
> 			//printf("  [%1.4e--%1.4e] x [%1.4e--%1.4e] \n", el_coords_macro[2*0+0],el_coords_macro[2*3+0],el_coords_macro[2*0+1],el_coords_macro[2*1+1]);
> 			
>       /* get coefficients for the element */
>       coeff_eta = LA_coeff[ej][ei].eta;
>
>       /* get averaged coefficients for the macro element */
> 			
> 			
>       /* initialise element stiffness matrix */
>       ierr = PetscMemzero(Ae,sizeof(PetscScalar)*4*U_DOFS*4*U_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(Ge,sizeof(PetscScalar)*4*U_DOFS*3*P_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(De,sizeof(PetscScalar)*3*P_DOFS*4*U_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(Se,sizeof(PetscScalar)*3*P_DOFS*3*P_DOFS);CHKERRQ(ierr);
> 			
>       /* form element stiffness matrix */
>       FormStressOperatorQ1(Ae,el_coords,coeff_eta);
> 			
> #if 1
> 			/* compute pressure basis functions */
> 			for (qp=0; qp<GAUSS_POINTS; qp++) {
> 				PetscScalar qpX[2];
> 				
> 				/* interpolate global coords to qp */
> 				FEBasisQ1Evaluate_Ni(&qxi_macro[lmei][lmej][2*qp],Ni_p);
> 				qpX[0] = qpX[1] = 0.0;
> 				for (kk=0; kk<4; kk++) {
> 					qpX[0] += Ni_p[kk]*el_coords[2*kk+0];
> 					qpX[1] += Ni_p[kk]*el_coords[2*kk+1];
> 				}
> 				
> 				//FEBasisQ1Evaluate_Mi_P1rel(qpX,el_coords_macro,Mi_p[qp]);
> 				//FEBasisQ1Evaluate_Mi_P1(qpX,Mi_p[qp]); // BUG - WRONG COORD!
> 				//FEBasisQ1Evaluate_Mi_P1(&qxi_macro[lmei][lmej][2*qp],Mi_p[qp]);
> 				__FEBasisQ1Evaluate_Mi_P1rel(&qxi_macro[lmei][lmej][2*qp],el_coords_macro,Mi_p[qp]);
> 			}
> #endif
>
> #if 0
> 			/* compute pressure basis functions */
> 			{
> 				PetscScalar qpX[2];
> 				PetscScalar gp_xi[GAUSS_POINTS][2];
> 				PetscScalar gp_weight[GAUSS_POINTS];
> 				PetscScalar sti,stj;
> 				PetscInt ngp;
>
> 				/* define quadrature rule */
> 				ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
> 				
> 				for (qp=0; qp<GAUSS_POINTS; qp++) {
> 					
> 					/* interpolate global coords to qp */
> 					FEBasisQ1Evaluate_Ni(gp_xi[qp],Ni_p);
> 					qpX[0] = qpX[1] = 0.0;
> 					for (kk=0; kk<4; kk++) {
> 						qpX[0] += Ni_p[kk]*el_coords[2*kk+0];
> 						qpX[1] += Ni_p[kk]*el_coords[2*kk+1];
> 					}
> 					
> 					FEBasisQ1Evaluate_Mi_P1rel(qpX,el_coords_macro,Mi_p[qp]);
> 				}
> 			}
> #endif
> 			
>       FormGradientOperatorQ1mP1(Ge,el_coords,Mi_p);
>       FormDivergenceOperator(De,Ge,4,3);
>
> 			
> 			FormScaledMassMatrixOperatorP1P1(Se,el_coords,coeff_eta,Mi_p);
> 			/*
> 			for (qp=0; qp<3*3; qp++) {
> 				printf("  %1.4e ",Se[qp]);
> 				if ((qp+1)%3==0) { printf("\n");}
> 			}
> 			printf("\n");
> 			*/
> 			/*
> 			{
> 				PetscScalar _Se[3*3];
>
> 				ierr = PetscMemzero(_Se,sizeof(PetscScalar)*3*P_DOFS*3*P_DOFS);CHKERRQ(ierr);
> 				_FormScaledMassMatrixOperatorP1P1(_Se,el_coords_macro,coeff_eta);
> 				
> 				printf("_Se[] = \n");
> 				for (qp=0; qp<3*3; qp++) {
> 					printf("  %1.4e ",_Se[qp]);
> 					if ((qp+1)%3==0) { printf("\n");}
> 				}
> 				printf("\n");
> 				
> 			}
> 			*/
> 			 
> 			if (ei==0 && ej==0 ) {
> 				PetscInt I,J,N;
> 				
> 				N = 3*P_DOFS;
> 				printf("Ge = \n");
> 				for( I=0; I<4*U_DOFS; I++ ) {
> 					for( J=0; J<3*P_DOFS; J++ ) {
> 						printf("%+1.4e ", Ge[I*N+J] );
> 					} printf("\n");
> 				} printf("\n");
> 				
> 				N = 4*U_DOFS;
> 				printf("De = \n");
> 				for( I=0; I<4*P_DOFS; I++ ) {
> 					for( J=0; J<3*U_DOFS; J++ ) {
> 						printf("%+1.4e ", De[I*N+J] );
> 					} printf("\n");
> 				} printf("\n");
> 				
> 			}
> 			
>       ierr = MatSetValuesStencil(Auu,4*U_DOFS,u_eqn,4*U_DOFS,u_eqn,Ae,ADD_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValues(Aup,4*U_DOFS,u_idx,3*P_DOFS,p_idx,Ge,ADD_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValues(Apu,3*P_DOFS,p_idx,4*U_DOFS,u_idx,De,ADD_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValuesStencil(Spp,3*P_DOFS,p_eqn,3*P_DOFS,p_eqn,Se,ADD_VALUES);CHKERRQ(ierr);
>     }
>   }
>
> 	
> 	
> 	
> 	/* assemble sub mats */
>   ierr = MatAssemblyBegin(Auu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Auu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   ierr = MatAssemblyBegin(Aup,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Aup,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   ierr = MatAssemblyBegin(Apu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Apu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   ierr = MatAssemblyBegin(Spp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Spp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
> 	//	MatView(Auu,PETSC_VIEWER_STDOUT_WORLD);
> 	//  MatView(Aup,PETSC_VIEWER_STDOUT_WORLD);
> 	//	MatView(Apu,PETSC_VIEWER_STDOUT_WORLD);
> 	//	MatView(Spp,PETSC_VIEWER_STDOUT_WORLD);
> 	
> 	/* assemble nest */
>   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
> 	/* return destroy to A,B */
> 	ierr = MatDestroy(&Auu);CHKERRQ(ierr);
> 	ierr = MatDestroy(&Aup);CHKERRQ(ierr);
> 	ierr = MatDestroy(&Apu);CHKERRQ(ierr);
> 	ierr = MatDestroy(&Spp);CHKERRQ(ierr);
> 	
>   ierr = DMDAVecRestoreArray(cda,coords,&LA_coords);CHKERRQ(ierr);
>   ierr = DMDAVecRestoreArray(dm_coeff,coeff,&LA_coeff);CHKERRQ(ierr);
> 	ierr = ISDestroy(&is[0]);CHKERRQ(ierr);
> 	ierr = ISDestroy(&is[1]);CHKERRQ(ierr);
> 	ierr = PetscFree(is);CHKERRQ(ierr);
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DAssembleOperators_Q1mP1_nearly_incompressible"
> PetscErrorCode Stokes2DAssembleOperators_Q1mP1_nearly_incompressible(Mat A,Mat B,DM dm_stokes,DM dm_coeff,Vec coeff)
> {
>   DM                     dav,dap,cda;
>   Vec                    coords;
>   DMDACoor2d             **LA_coords;
>   MatStencil             u_eqn[NODES_PER_EL*U_DOFS]; /* 2 degrees of freedom */
>   MatStencil             p_eqn[NODES_PER_EL*P_DOFS]; /* 1 degrees of freedom */
>   PetscInt               sex,sey,mx,my;
>   PetscInt               ei,ej,mei,mej,lmei,lmej,qp,kk;
>   PetscScalar            Ae[4*U_DOFS*4*U_DOFS];
>   PetscScalar            Ge[4*U_DOFS*3*P_DOFS];
>   PetscScalar            De[3*P_DOFS*4*U_DOFS];
>   PetscScalar            Se[3*P_DOFS*3*P_DOFS];
>   PetscScalar            el_coords[4*NSD],el_coords_macro[4*NSD];
>   GaussPointCoefficients **LA_coeff;
>   PetscScalar            *coeff_eta;
> 	IS                     *is;
> 	Mat                    Auu,Aup,Apu,Spp;
> 	PetscInt               u_idx[NODES_PER_EL*U_DOFS];
> 	PetscInt               p_idx[3*P_DOFS];
> 	PetscInt               *LA_gidx_u,*LA_gidx_p,mg_u,mg_p;
> 	PetscScalar            Ni_p[4],Mi_p[GAUSS_POINTS][3],qxi_macro[2][2][4*2],qw_macro[2][2][4];
> 	PetscScalar            kappa = 1.0e6;
>   PetscErrorCode         ierr;
> 	
>   PetscFunctionBeginUser;
>
> 	ierr = PetscOptionsGetScalar(PETSC_NULL,"-kappa",&kappa,0);CHKERRQ(ierr);
> 	
> 	{
> 		PetscInt    ngp,ii,jj,p;
> 		PetscScalar gp_xi[GAUSS_POINTS][2];
> 		PetscScalar gp_weight[GAUSS_POINTS];
> 		PetscScalar sti,stj;
> 		
> 		/* define quadrature rule */
> 		ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
> 		
> 		/* scale coords and weights for sub cell integration over macro */
> 		//
> 		// -1 <= xi,eta <= 1   --->>   sti <= xi,eta <= sti+1
> 		// [xi0 - (-1)]/2 = [xin - (sti)]/1
> 		//
> 		//
> 		
> 		for (ii=0; ii<2; ii++) {
> 			for (jj=0; jj<2; jj++) {
> 				sti = -1.0 + 1.0 * ii;
> 				stj = -1.0 + 1.0 * jj;
> 				
> 				for (p=0; p<GAUSS_POINTS; p++) {
> 					PetscScalar xi0,eta0;
> 					
> 					xi0  = gp_xi[p][0];
> 					eta0 = gp_xi[p][1];
> 					
> 					qxi_macro[ii][jj][2*p+0] = 0.5*(xi0-(-1.0)) + sti;
> 					qxi_macro[ii][jj][2*p+1] = 0.5*(eta0-(-1.0)) + stj;
> 				}
> 				
> 			}
> 		}
> 		
> 		
> 		for (ii=0; ii<2; ii++) {
> 			for (jj=0; jj<2; jj++) {
> 				qw_macro[ii][jj][0] = (1.0/16.0) * gp_weight[0];
> 				qw_macro[ii][jj][1] = (1.0/16.0) * gp_weight[1];
> 				qw_macro[ii][jj][2] = (1.0/16.0) * gp_weight[2];
> 				qw_macro[ii][jj][3] = (1.0/16.0) * gp_weight[3];
> 			}
> 		}
> 		
> 		for (ii=0; ii<2; ii++) {
> 			for (jj=0; jj<2; jj++) {
> 				printf("[%d][%d] - xi:[(%+1.4e %+1.4e) (%+1.4e %+1.4e) (%+1.4e %+1.4e) (%+1.4e %+1.4e)] w:[%1.4e %1.4e %1.4e %1.4e] \n",
> 							 ii,jj,qxi_macro[ii][jj][2*0+0],qxi_macro[ii][jj][2*0+1], qxi_macro[ii][jj][2*1+0],qxi_macro[ii][jj][2*1+1], qxi_macro[ii][jj][2*2+0],qxi_macro[ii][jj][2*2+1], qxi_macro[ii][jj][2*3+0],qxi_macro[ii][jj][2*3+1],
> 							 qw_macro[ii][jj][0],qw_macro[ii][jj][1],qw_macro[ii][jj][2],qw_macro[ii][jj][3] );
> 			}
> 		}
> 		
> 	}
> 	
> 	
> 	/* access dm for u,p */
> 	ierr = DMCompositeGetEntries(dm_stokes,&dav,&dap);CHKERRQ(ierr);
> 	
>   /* access for coords */
>   ierr = DMGetCoordinateDM(dav,&cda);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(dav,&coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cda,coords,&LA_coords);CHKERRQ(ierr);
> 	
>   /* access for coefficients */
>   ierr = DMDAVecGetArray(dm_coeff,coeff,&LA_coeff);CHKERRQ(ierr);
> 	
> 	/* access mat nest */
> 	ierr = DMCompositeGetGlobalISs(dm_stokes,&is);CHKERRQ(ierr);
> 	
> 	ierr = MatGetSubMatrix(A,is[0],is[0],MAT_INITIAL_MATRIX,&Auu);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[0],is[1],MAT_INITIAL_MATRIX,&Aup);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[1],is[0],MAT_INITIAL_MATRIX,&Apu);CHKERRQ(ierr);
> 	
> 	ierr = MatGetSubMatrix(B,is[1],is[1],MAT_INITIAL_MATRIX,&Spp);CHKERRQ(ierr);
> 	
> 	/* get indices */
> 	ierr = DMDAGetGlobalIndices(dav,0,&LA_gidx_u);CHKERRQ(ierr);
> 	ierr = DMDAGetGlobalIndices(dap,0,&LA_gidx_p);CHKERRQ(ierr);
> 	ierr = DMDAGetGhostCorners(dav,0,0,0,&mg_u,0,0);CHKERRQ(ierr);
> 	ierr = DMDAGetGhostCorners(dap,0,0,0,&mg_p,0,0);CHKERRQ(ierr);
> 	
>   ierr = DMDAGetElementCorners(dav,&sex,&sey,0,&mx,&my,0);CHKERRQ(ierr);
>   for (ej = sey; ej < sey+my; ej++) {
> 		mej = ej/2;
>     for (ei = sex; ei < sex+mx; ei++) {
> 			mei = ei/2;
> 			
> 			if (ei%2 == 0) { lmei = 0; } else { lmei = 1; }
> 			if (ej%2 == 0) { lmej = 0; } else { lmej = 1; }
> 			
> 			/* get indices for the element and macro */
> 			ierr = Stokes2DDMDAGetDofEqnumU_Q1(u_idx,ei,ej,LA_gidx_u,mg_u);CHKERRQ(ierr);
> 			ierr = Stokes2DDMDAGetDofEqnumP_P1(p_idx,mei,mej,LA_gidx_p,mg_p);CHKERRQ(ierr);
> 			
>       /* get stencil indices for element and macro */
>       ierr = Stokes2DDMDAGetElementEqnumsU_Q1(u_eqn,ei,ej);CHKERRQ(ierr);
>       ierr = Stokes2DDMDAGetElementEqnumsP_P1(p_eqn,mei,mej);CHKERRQ(ierr);
> 			
> 			//printf("[%d,%d] - u = { %d,%d,%d,%d } \n",ei,ej,u_idx[2*0],u_idx[2*1],u_idx[2*2],u_idx[2*3]);
> 			//printf("[%d,%d] - v = { %d,%d,%d,%d } \n",ei,ej,u_idx[2*0+1],u_idx[2*1+1],u_idx[2*2+1],u_idx[2*3+1]);
> 			//printf("[%d,%d/%d,%d] - p = { %d,%d,%d } \n",ei,ej,mei,mej,p_idx[0],p_idx[1],p_idx[2]);
> 			
>       /* get coords for the element */
>       ierr = GetElementCoords(LA_coords,ei,ej,el_coords);CHKERRQ(ierr);
> 			
> 			/* get coords for the macro */
> 			ierr = GetElementCoordsMacro(LA_coords,2*mei,2*mej,el_coords_macro);CHKERRQ(ierr);
> 			//printf("  [%1.4e--%1.4e] x [%1.4e--%1.4e] \n", el_coords_macro[2*0+0],el_coords_macro[2*3+0],el_coords_macro[2*0+1],el_coords_macro[2*1+1]);
> 			
>       /* get coefficients for the element */
>       coeff_eta = LA_coeff[ej][ei].eta;
> 			
>       /* get averaged coefficients for the macro element */
> 			
> 			
>       /* initialise element stiffness matrix */
>       ierr = PetscMemzero(Ae,sizeof(PetscScalar)*4*U_DOFS*4*U_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(Ge,sizeof(PetscScalar)*4*U_DOFS*3*P_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(De,sizeof(PetscScalar)*3*P_DOFS*4*U_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(Se,sizeof(PetscScalar)*3*P_DOFS*3*P_DOFS);CHKERRQ(ierr);
> 			
>       /* form element stiffness matrix */
>       FormStressOperatorQ1(Ae,el_coords,coeff_eta);
> 			
> 			/* compute pressure basis functions */
> 			for (qp=0; qp<GAUSS_POINTS; qp++) {
> 				PetscScalar qpX[2];
> 				
> 				/* interpolate global coords to qp */
> 				FEBasisQ1Evaluate_Ni(&qxi_macro[lmei][lmej][2*qp],Ni_p);
> 				qpX[0] = qpX[1] = 0.0;
> 				for (kk=0; kk<4; kk++) {
> 					qpX[0] += Ni_p[kk]*el_coords[2*kk+0];
> 					qpX[1] += Ni_p[kk]*el_coords[2*kk+1];
> 				}
> 				
> 				//FEBasisQ1Evaluate_Mi_P1rel(qpX,el_coords_macro,Mi_p[qp]);
> 				FEBasisQ1Evaluate_Mi_P1(&qxi_macro[lmei][lmej][2*qp],Mi_p[qp]);
> 			}
> 			
>       FormGradientOperatorQ1mP1(Ge,el_coords,Mi_p);
>       FormDivergenceOperator(De,Ge,4,3);
> 			
> 			
> 			FormScaledMassMatrixOperatorP1P1_const(Se,el_coords,kappa,Mi_p);
>
> 			if (ei==0 && ej==0 ) {
> 				PetscInt I,J,N;
> 				
> 				N = 3*P_DOFS;
> 				printf("Ge = \n");
> 				for( I=0; I<4*U_DOFS; I++ ) {
> 					for( J=0; J<3*P_DOFS; J++ ) {
> 						printf("%+1.4e ", Ge[I*N+J] );
> 					} printf("\n");
> 				} printf("\n");
> 				
> 				N = 4*U_DOFS;
> 				printf("De = \n");
> 				for( I=0; I<4*P_DOFS; I++ ) {
> 					for( J=0; J<3*U_DOFS; J++ ) {
> 						printf("%+1.4e ", De[I*N+J] );
> 					} printf("\n");
> 				} printf("\n");
> 				
> 			}
> 			
>       ierr = MatSetValuesStencil(Auu,4*U_DOFS,u_eqn,4*U_DOFS,u_eqn,Ae,ADD_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValues(Aup,4*U_DOFS,u_idx,3*P_DOFS,p_idx,Ge,ADD_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValues(Apu,3*P_DOFS,p_idx,4*U_DOFS,u_idx,De,ADD_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValuesStencil(Spp,3*P_DOFS,p_eqn,3*P_DOFS,p_eqn,Se,ADD_VALUES);CHKERRQ(ierr);
>     }
>   }
> 	
> 	
> 	
> 	
> 	/* assemble sub mats */
>   ierr = MatAssemblyBegin(Auu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Auu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   ierr = MatAssemblyBegin(Aup,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Aup,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   ierr = MatAssemblyBegin(Apu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Apu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   ierr = MatAssemblyBegin(Spp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Spp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
> 	//	MatView(Auu,PETSC_VIEWER_STDOUT_WORLD);
> 	//  MatView(Aup,PETSC_VIEWER_STDOUT_WORLD);
> 	//	MatView(Apu,PETSC_VIEWER_STDOUT_WORLD);
> 	//	MatView(Spp,PETSC_VIEWER_STDOUT_WORLD);
> 	
> 	/* assemble nest */
>   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
> 	/* return destroy to A,B */
> 	ierr = MatDestroy(&Auu);CHKERRQ(ierr);
> 	ierr = MatDestroy(&Aup);CHKERRQ(ierr);
> 	ierr = MatDestroy(&Apu);CHKERRQ(ierr);
> 	ierr = MatDestroy(&Spp);CHKERRQ(ierr);
> 	
>   ierr = DMDAVecRestoreArray(cda,coords,&LA_coords);CHKERRQ(ierr);
>   ierr = DMDAVecRestoreArray(dm_coeff,coeff,&LA_coeff);CHKERRQ(ierr);
> 	ierr = ISDestroy(&is[0]);CHKERRQ(ierr);
> 	ierr = ISDestroy(&is[1]);CHKERRQ(ierr);
> 	ierr = PetscFree(is);CHKERRQ(ierr);
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DAssembleOperators_Q1mP0"
> PetscErrorCode Stokes2DAssembleOperators_Q1mP0(Mat A,Mat B,DM dm_stokes,DM dm_coeff,Vec coeff)
> {
>   DM                     dav,dap,cda;
>   Vec                    coords;
>   DMDACoor2d             **LA_coords;
>   MatStencil             u_eqn[NODES_PER_EL*U_DOFS]; /* 2 degrees of freedom */
>   MatStencil             p_eqn[NODES_PER_EL*P_DOFS]; /* 1 degrees of freedom */
>   PetscInt               sex,sey,mx,my;
>   PetscInt               ei,ej,mei,mej,lmei,lmej,qp,kk;
>   PetscScalar            Ae[4*U_DOFS*4*U_DOFS];
>   PetscScalar            Ge[4*U_DOFS*1*P_DOFS];
>   PetscScalar            De[1*P_DOFS*4*U_DOFS];
>   PetscScalar            Se[1*P_DOFS*1*P_DOFS];
>   PetscScalar            el_coords[4*NSD],el_coords_macro[4*NSD];
>   GaussPointCoefficients **LA_coeff;
>   PetscScalar            *coeff_eta;
> 	IS                     *is;
> 	Mat                    Auu,Aup,Apu,Spp;
> 	PetscInt               u_idx[NODES_PER_EL*U_DOFS];
> 	PetscInt               p_idx[1*P_DOFS];
> 	PetscInt               *LA_gidx_u,*LA_gidx_p,mg_u,mg_p;
> 	PetscScalar            Ni_p[4],Mi_p[GAUSS_POINTS][1],qxi_macro[2][2][4*2],qw_macro[2][2][4];
>   PetscErrorCode         ierr;
> 	
>   PetscFunctionBeginUser;
> 	
> 	{
> 		PetscInt    ngp,ii,jj,p;
> 		PetscScalar gp_xi[GAUSS_POINTS][2];
> 		PetscScalar gp_weight[GAUSS_POINTS];
> 		PetscScalar sti,stj;
> 		
> 		/* define quadrature rule */
> 		ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
> 		
> 		/* scale coords and weights for sub cell integration over macro */
> 		//
> 		// -1 <= xi,eta <= 1   --->>   sti <= xi,eta <= sti+1
> 		// [xi0 - (-1)]/2 = [xin - (sti)]/1
> 		//
> 		//
> 		
> 		for (ii=0; ii<2; ii++) {
> 			for (jj=0; jj<2; jj++) {
> 				sti = -1.0 + 1.0 * ii;
> 				stj = -1.0 + 1.0 * jj;
> 				
> 				for (p=0; p<GAUSS_POINTS; p++) {
> 					PetscScalar xi0,eta0;
> 					
> 					xi0  = gp_xi[p][0];
> 					eta0 = gp_xi[p][1];
> 					
> 					qxi_macro[ii][jj][2*p+0] = 0.5*(xi0-(-1.0)) + sti;
> 					qxi_macro[ii][jj][2*p+1] = 0.5*(eta0-(-1.0)) + stj;
> 				}
> 				
> 			}
> 		}
> 		
> 		
> 		for (ii=0; ii<2; ii++) {
> 			for (jj=0; jj<2; jj++) {
> 				qw_macro[ii][jj][0] = (1.0/16.0) * gp_weight[0];
> 				qw_macro[ii][jj][1] = (1.0/16.0) * gp_weight[1];
> 				qw_macro[ii][jj][2] = (1.0/16.0) * gp_weight[2];
> 				qw_macro[ii][jj][3] = (1.0/16.0) * gp_weight[3];
> 			}
> 		}
> 		
> 		for (ii=0; ii<2; ii++) {
> 			for (jj=0; jj<2; jj++) {
> 				printf("[%d][%d] - xi:[(%+1.4e %+1.4e) (%+1.4e %+1.4e) (%+1.4e %+1.4e) (%+1.4e %+1.4e)] w:[%1.4e %1.4e %1.4e %1.4e] \n",
> 							 ii,jj,qxi_macro[ii][jj][2*0+0],qxi_macro[ii][jj][2*0+1], qxi_macro[ii][jj][2*1+0],qxi_macro[ii][jj][2*1+1], qxi_macro[ii][jj][2*2+0],qxi_macro[ii][jj][2*2+1], qxi_macro[ii][jj][2*3+0],qxi_macro[ii][jj][2*3+1],
> 							 qw_macro[ii][jj][0],qw_macro[ii][jj][1],qw_macro[ii][jj][2],qw_macro[ii][jj][3] );
> 			}
> 		}
> 		
> 	}
> 	
> 	
> 	/* access dm for u,p */
> 	ierr = DMCompositeGetEntries(dm_stokes,&dav,&dap);CHKERRQ(ierr);
> 	
>   /* access for coords */
>   ierr = DMGetCoordinateDM(dav,&cda);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(dav,&coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cda,coords,&LA_coords);CHKERRQ(ierr);
> 	
>   /* access for coefficients */
>   ierr = DMDAVecGetArray(dm_coeff,coeff,&LA_coeff);CHKERRQ(ierr);
> 	
> 	/* access mat nest */
> 	ierr = DMCompositeGetGlobalISs(dm_stokes,&is);CHKERRQ(ierr);
> 	
> 	ierr = MatGetSubMatrix(A,is[0],is[0],MAT_INITIAL_MATRIX,&Auu);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[0],is[1],MAT_INITIAL_MATRIX,&Aup);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[1],is[0],MAT_INITIAL_MATRIX,&Apu);CHKERRQ(ierr);
> 	
> 	ierr = MatGetSubMatrix(B,is[1],is[1],MAT_INITIAL_MATRIX,&Spp);CHKERRQ(ierr);
> 	
> 	/* get indices */
> 	ierr = DMDAGetGlobalIndices(dav,0,&LA_gidx_u);CHKERRQ(ierr);
> 	ierr = DMDAGetGlobalIndices(dap,0,&LA_gidx_p);CHKERRQ(ierr);
> 	ierr = DMDAGetGhostCorners(dav,0,0,0,&mg_u,0,0);CHKERRQ(ierr);
> 	ierr = DMDAGetGhostCorners(dap,0,0,0,&mg_p,0,0);CHKERRQ(ierr);
> 	
>   ierr = DMDAGetElementCorners(dav,&sex,&sey,0,&mx,&my,0);CHKERRQ(ierr);
>   for (ej = sey; ej < sey+my; ej++) {
> 		mej = ej/2;
>     for (ei = sex; ei < sex+mx; ei++) {
> 			mei = ei/2;
> 			
> 			if (ei%2 == 0) { lmei = 0; } else { lmei = 1; }
> 			if (ej%2 == 0) { lmej = 0; } else { lmej = 1; }
> 			
> 			/* get indices for the element and macro */
> 			ierr = Stokes2DDMDAGetDofEqnumU_Q1(u_idx,ei,ej,LA_gidx_u,mg_u);CHKERRQ(ierr);
> 			ierr = Stokes2DDMDAGetDofEqnumP_P0(p_idx,mei,mej,LA_gidx_p,mg_p);CHKERRQ(ierr);
> 			
>       /* get stencil indices for element and macro */
>       ierr = Stokes2DDMDAGetElementEqnumsU_Q1(u_eqn,ei,ej);CHKERRQ(ierr);
>       ierr = Stokes2DDMDAGetElementEqnumsP_P0(p_eqn,mei,mej);CHKERRQ(ierr);
> 			
> 			//printf("[%d,%d] - u = { %d,%d,%d,%d } \n",ei,ej,u_idx[2*0],u_idx[2*1],u_idx[2*2],u_idx[2*3]);
> 			//printf("[%d,%d] - v = { %d,%d,%d,%d } \n",ei,ej,u_idx[2*0+1],u_idx[2*1+1],u_idx[2*2+1],u_idx[2*3+1]);
> 			//printf("[%d,%d/%d,%d] - p = { %d,%d,%d } \n",ei,ej,mei,mej,p_idx[0],p_idx[1],p_idx[2]);
> 			
>       /* get coords for the element */
>       ierr = GetElementCoords(LA_coords,ei,ej,el_coords);CHKERRQ(ierr);
> 			
> 			/* get coords for the macro */
> 			ierr = GetElementCoordsMacro(LA_coords,2*mei,2*mej,el_coords_macro);CHKERRQ(ierr);
> 			//printf("  [%1.4e--%1.4e] x [%1.4e--%1.4e] \n", el_coords_macro[2*0+0],el_coords_macro[2*3+0],el_coords_macro[2*0+1],el_coords_macro[2*1+1]);
> 			
>       /* get coefficients for the element */
>       coeff_eta = LA_coeff[ej][ei].eta;
> 			
>       /* get averaged coefficients for the macro element */
> 			
> 			
>       /* initialise element stiffness matrix */
>       ierr = PetscMemzero(Ae,sizeof(PetscScalar)*4*U_DOFS*4*U_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(Ge,sizeof(PetscScalar)*4*U_DOFS*1*P_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(De,sizeof(PetscScalar)*1*P_DOFS*4*U_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(Se,sizeof(PetscScalar)*1*P_DOFS*1*P_DOFS);CHKERRQ(ierr);
> 			
>       /* form element stiffness matrix */
>       FormStressOperatorQ1(Ae,el_coords,coeff_eta);
> 			
> 			/* compute pressure basis functions */
> 			for (qp=0; qp<GAUSS_POINTS; qp++) {
> 				PetscScalar qpX[2];
> 				
> 				/* interpolate global coords to qp */
> 				FEBasisQ1Evaluate_Ni(&qxi_macro[lmei][lmej][2*qp],Ni_p);
> 				qpX[0] = qpX[1] = 0.0;
> 				for (kk=0; kk<4; kk++) {
> 					qpX[0] += Ni_p[kk]*el_coords[2*kk+0];
> 					qpX[1] += Ni_p[kk]*el_coords[2*kk+1];
> 				}
> 				
> 				//FEBasisQ1Evaluate_Mi_P1rel(qpX,el_coords_macro,Mi_p[qp]);
> 				FEBasisQ1Evaluate_Mi_P0(&qxi_macro[lmei][lmej][2*qp],Mi_p[qp]);
> 			}
> 			
>       FormGradientOperatorQ1mP0(Ge,el_coords,Mi_p);
>       FormDivergenceOperator(De,Ge,4,1);
> 			
> 			
> 			FormScaledMassMatrixOperatorP0P0(Se,el_coords,coeff_eta);
> 			
> 			if (ei==0 && ej==0 ) {
> 				PetscInt I,J,N;
> 				
> 				N = 1*P_DOFS;
> 				printf("Ge = \n");
> 				for( I=0; I<4*U_DOFS; I++ ) {
> 					for( J=0; J<1*P_DOFS; J++ ) {
> 						printf("%+1.4e ", Ge[I*N+J] );
> 					} printf("\n");
> 				} printf("\n");
> 				
> 				N = 4*U_DOFS;
> 				printf("De = \n");
> 				for( I=0; I<4*P_DOFS; I++ ) {
> 					for( J=0; J<1*U_DOFS; J++ ) {
> 						printf("%+1.4e ", De[I*N+J] );
> 					} printf("\n");
> 				} printf("\n");
> 				
> 			}
> 			
>       ierr = MatSetValuesStencil(Auu,4*U_DOFS,u_eqn,4*U_DOFS,u_eqn,Ae,ADD_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValues(Aup,4*U_DOFS,u_idx,1*P_DOFS,p_idx,Ge,ADD_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValues(Apu,1*P_DOFS,p_idx,4*U_DOFS,u_idx,De,ADD_VALUES);CHKERRQ(ierr);
>       ierr = MatSetValuesStencil(Spp,1*P_DOFS,p_eqn,1*P_DOFS,p_eqn,Se,ADD_VALUES);CHKERRQ(ierr);
>     }
>   }
> 	
> 	/* assemble sub mats */
>   ierr = MatAssemblyBegin(Auu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Auu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   ierr = MatAssemblyBegin(Aup,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Aup,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   ierr = MatAssemblyBegin(Apu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Apu,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   ierr = MatAssemblyBegin(Spp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(Spp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
> 	/* assemble nest */
>   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
>   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
>   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
> 	
> 	/* return destroy to A,B */
> 	ierr = MatDestroy(&Auu);CHKERRQ(ierr);
> 	ierr = MatDestroy(&Aup);CHKERRQ(ierr);
> 	ierr = MatDestroy(&Apu);CHKERRQ(ierr);
> 	ierr = MatDestroy(&Spp);CHKERRQ(ierr);
> 	
>   ierr = DMDAVecRestoreArray(cda,coords,&LA_coords);CHKERRQ(ierr);
>   ierr = DMDAVecRestoreArray(dm_coeff,coeff,&LA_coeff);CHKERRQ(ierr);
> 	ierr = ISDestroy(&is[0]);CHKERRQ(ierr);
> 	ierr = ISDestroy(&is[1]);CHKERRQ(ierr);
> 	ierr = PetscFree(is);CHKERRQ(ierr);
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DBillinearizeMesh"
> PetscErrorCode Stokes2DBillinearizeMesh(DM dm_stokes)
> {
>   DM                     dav,dap,cda;
>   Vec                    coords;
>   DMDACoor2d             **LA_coords;
>   MatStencil             u_eqn[NODES_PER_EL*U_DOFS]; /* 2 degrees of freedom */
>   MatStencil             p_eqn[NODES_PER_EL*P_DOFS]; /* 1 degrees of freedom */
>   PetscInt               sex,sey,mx,my;
>   PetscInt               ei,ej,mei,mej,lmei,lmej,qp,kk,n;
>   PetscScalar            el_coords[4*NSD],el_coords_macro[4*NSD];
> 	PetscInt               u_idx[NODES_PER_EL*U_DOFS];
> 	PetscInt               p_idx[3*P_DOFS];
> 	PetscInt               *LA_gidx_u,*LA_gidx_p,mg_u,mg_p;
> 	PetscScalar            Ni_p[4],gp_bil_xi[5][2],interp_coord[5][2];
>   PetscErrorCode         ierr;
> 	
>   PetscFunctionBeginUser;
> 	
> 	gp_bil_xi[0][0] = -1.0;   gp_bil_xi[0][1] =  0.0;
> 	gp_bil_xi[1][0] =  0.0;   gp_bil_xi[1][1] =  1.0;
> 	gp_bil_xi[2][0] =  1.0;   gp_bil_xi[2][1] =  0.0;
> 	gp_bil_xi[3][0] =  0.0;   gp_bil_xi[3][1] = -1.0;
> 	gp_bil_xi[4][0] =  0.0;   gp_bil_xi[4][1] =  0.0;
> 	
> 	/* access dm for u,p */
> 	ierr = DMCompositeGetEntries(dm_stokes,&dav,&dap);CHKERRQ(ierr);
> 	
>   /* access for coords */
>   ierr = DMGetCoordinateDM(dav,&cda);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(dav,&coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cda,coords,&LA_coords);CHKERRQ(ierr);
> 	
> 	/* get indices */
> 	ierr = DMDAGetGlobalIndices(dav,0,&LA_gidx_u);CHKERRQ(ierr);
> 	ierr = DMDAGetGlobalIndices(dap,0,&LA_gidx_p);CHKERRQ(ierr);
> 	ierr = DMDAGetGhostCorners(dav,0,0,0,&mg_u,0,0);CHKERRQ(ierr);
> 	ierr = DMDAGetGhostCorners(dap,0,0,0,&mg_p,0,0);CHKERRQ(ierr);
> 	
>   ierr = DMDAGetElementCorners(dav,&sex,&sey,0,&mx,&my,0);CHKERRQ(ierr);
>   for (ej = sey/2; ej < (sey+my)/2; ej++) {
> 		mej = ej;
>     for (ei = sex/2; ei < (sex+mx)/2; ei++) {
> 			mei = ei;
> 			
> 			ierr = GetElementCoordsMacro(LA_coords,2*mei,2*mej,el_coords_macro);CHKERRQ(ierr);
> 			//printf("  [%1.4e--%1.4e] x [%1.4e--%1.4e] \n", el_coords_macro[2*0+0],el_coords_macro[2*3+0],el_coords_macro[2*0+1],el_coords_macro[2*1+1]);
> 			
> 			for (n=0; n<5; n++) {
> 				interp_coord[n][0] = interp_coord[n][1] = 0.0;
> 				
> 				FEBasisQ1Evaluate_Ni(gp_bil_xi[n],Ni_p);
> 				
> 				for (kk=0; kk<4; kk++) {
> 					interp_coord[n][0] += Ni_p[kk]*el_coords_macro[2*kk+0];
> 					interp_coord[n][1] += Ni_p[kk]*el_coords_macro[2*kk+1];
> 				}
> 				//printf("[%d] %1.4e %1.4e \n",n,interp_coord[n][0],interp_coord[n][1]);
> 			}
>
> 			LA_coords[2*ej+1][2*ei  ].x = interp_coord[0][0]; //printf(" -> %d %d \n",2*ei+0,2*ej+1);
> 			LA_coords[2*ej+2][2*ei+1].x = interp_coord[1][0]; //printf(" -> %d %d \n",2*ei+1,2*ej+2);
> 			LA_coords[2*ej+1][2*ei+2].x = interp_coord[2][0]; //printf(" -> %d %d \n",2*ei+2,2*ej+1);
> 			LA_coords[2*ej  ][2*ei+1].x = interp_coord[3][0]; //printf(" -> %d %d \n",2*ei+1,2*ej+0);
> 			LA_coords[2*ej+1][2*ei+1].x = interp_coord[4][0]; //printf(" -> %d %d \n",2*ei+1,2*ej+1);
>
> 			LA_coords[2*ej+1][2*ei  ].y = interp_coord[0][1];
> 			LA_coords[2*ej+2][2*ei+1].y = interp_coord[1][1];
> 			LA_coords[2*ej+1][2*ei+2].y = interp_coord[2][1];
> 			LA_coords[2*ej  ][2*ei+1].y = interp_coord[3][1];
> 			LA_coords[2*ej+1][2*ei+1].y = interp_coord[4][1];
> 			
> 			
> 			
>     }
>   }
> 	
>   ierr = DMDAVecRestoreArray(cda,coords,&LA_coords);CHKERRQ(ierr);
> 	
> 	{
> 		Vec da_coordinates;
> 		DM dac;
> 		
> 		ierr = DMGetCoordinateDM(dav,&dac);CHKERRQ(ierr);
> 		
> 		/* scatter new existing coords into global_coords */
> 		ierr = DMGetCoordinates(dav,&da_coordinates );CHKERRQ(ierr);
> 		ierr = VecZeroEntries(da_coordinates);CHKERRQ(ierr);
> 		ierr = DMLocalToGlobalBegin( dac, coords, INSERT_VALUES, da_coordinates );CHKERRQ(ierr);
> 		ierr = DMLocalToGlobalEnd  ( dac, coords, INSERT_VALUES, da_coordinates );CHKERRQ(ierr);
> 		
> 	}
> 	
>   PetscFunctionReturn(0);
> }
>
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DDMDAAddValuesLocalStencil_U_Q1"
> PetscErrorCode Stokes2DDMDAAddValuesLocalStencil_U_Q1(StokesUDOF **fields_F,MatStencil u_eqn[],PetscScalar Fe_u[])
> {
>   PetscInt n;
> 	
>   PetscFunctionBeginUser;
>   for (n = 0; n < 4; n++) {
>     fields_F[u_eqn[2*n  ].j][u_eqn[2*n  ].i].u_dof = fields_F[u_eqn[2*n  ].j][u_eqn[2*n  ].i].u_dof+Fe_u[2*n  ];
>     fields_F[u_eqn[2*n+1].j][u_eqn[2*n+1].i].v_dof = fields_F[u_eqn[2*n+1].j][u_eqn[2*n+1].i].v_dof+Fe_u[2*n+1];
>   }
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DAssembleRHS_Q1"
> PetscErrorCode Stokes2DAssembleRHS_Q1(Vec F,DM dm_stokes,DM dm_coeff,Vec coeff)
> {
>   DM                     dav,dap,cda;
>   Vec                    coords;
>   DMDACoor2d             **LA_coords;
>   MatStencil             u_eqn[NODES_PER_EL*U_DOFS]; /* 2 degrees of freedom */
>   MatStencil             p_eqn[NODES_PER_EL*P_DOFS]; /* 1 degrees of freedom */
>   PetscInt               sex,sey,mx,my;
>   PetscInt               ei,ej;
>   PetscScalar            Fe[NODES_PER_EL*U_DOFS];
>   PetscScalar            He[NODES_PER_EL*P_DOFS];
>   PetscScalar            el_coords[NODES_PER_EL*NSD];
>   GaussPointCoefficients **LA_coeff;
>   PetscScalar            *coeff_fx,*coeff_fy;
>   Vec                    Fu_local,Fu,Fp;
>   StokesUDOF             **LA_ff;
>   PetscErrorCode         ierr;
> 	
>   PetscFunctionBeginUser;
>
> 	ierr = VecZeroEntries(F);CHKERRQ(ierr);
>
> 	/* access dm for u,p */
> 	ierr = DMCompositeGetEntries(dm_stokes,&dav,&dap);CHKERRQ(ierr);
>
> 	/* setup for coords */
>   ierr = DMGetCoordinateDM(dav,&cda);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(dav,&coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cda,coords,&LA_coords);CHKERRQ(ierr);
> 	
>   /* setup for coefficients */
>   ierr = DMDAVecGetArray(dm_coeff,coeff,&LA_coeff);CHKERRQ(ierr);
> 	
>   /* get access to the vector */
> 	ierr = DMCompositeGetAccess(dm_stokes,F,&Fu,&Fp);CHKERRQ(ierr);
>
>   ierr = DMGetLocalVector(dav,&Fu_local);CHKERRQ(ierr);
>   ierr = VecZeroEntries(Fu_local);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(dav,Fu_local,&LA_ff);CHKERRQ(ierr);
> 	
> 	/* sum */
>   ierr = DMDAGetElementCorners(dav,&sex,&sey,0,&mx,&my,0);CHKERRQ(ierr);
>   for (ej = sey; ej < sey+my; ej++) {
>     for (ei = sex; ei < sex+mx; ei++) {
>       /* get coords for the element */
>       GetElementCoords(LA_coords,ei,ej,el_coords);
> 			
>       /* get coefficients for the element */
>       coeff_fx = LA_coeff[ej][ei].fx;
>       coeff_fy = LA_coeff[ej][ei].fy;
> 			
>       /* initialise element stiffness matrix */
>       ierr = PetscMemzero(Fe,sizeof(PetscScalar)*NODES_PER_EL*U_DOFS);CHKERRQ(ierr);
>       ierr = PetscMemzero(He,sizeof(PetscScalar)*NODES_PER_EL*P_DOFS);CHKERRQ(ierr);
> 			
> 			
>       /* form element stiffness matrix */
>       FormMomentumRhsQ1(Fe,el_coords,coeff_fx,coeff_fy);
> 			
>       /* insert element matrix into global matrix */
>       ierr = Stokes2DDMDAGetElementEqnumsU_Q1(u_eqn,ei,ej);CHKERRQ(ierr);
>       ierr = Stokes2DDMDAAddValuesLocalStencil_U_Q1(LA_ff,u_eqn,Fe);CHKERRQ(ierr);
>     }
>   }
> 	
>   ierr = DMDAVecRestoreArray(dav,Fu_local,&LA_ff);CHKERRQ(ierr);
>   ierr = DMLocalToGlobalBegin(dav,Fu_local,ADD_VALUES,Fu);CHKERRQ(ierr);
>   ierr = DMLocalToGlobalEnd(dav,Fu_local,ADD_VALUES,Fu);CHKERRQ(ierr);
>   ierr = DMRestoreLocalVector(dav,&Fu_local);CHKERRQ(ierr);
> 	ierr = DMCompositeRestoreAccess(dm_stokes,F,&Fu,&Fp);CHKERRQ(ierr);
> 	
>   ierr = DMDAVecRestoreArray(cda,coords,&LA_coords);CHKERRQ(ierr);
>   ierr = DMDAVecRestoreArray(dm_coeff,coeff,&LA_coeff);CHKERRQ(ierr);
>
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DBCApply_FreeSlip"
> PetscErrorCode Stokes2DBCApply_FreeSlip(DM dm_stokes,Mat A,Mat B,Vec b)
> {
>   DM             dav,dap;
> 	Mat            Auu,Aup;
> 	IS             *is;
>   PetscInt       si,sj,nx,ny,i,j,d_idx;
>   PetscInt       M,N;
>   PetscInt       *g_idx;
>   PetscInt       *bc_global_ids;
>   PetscScalar    *bc_vals;
>   PetscInt       nbcs,max_bcs;
>   PetscInt       n_dofs;
>   PetscErrorCode ierr;
> 	
>   PetscFunctionBeginUser;
> 	
> 	PetscPrintf(PETSC_COMM_WORLD,"Stokes2DBCApply_FreeSlip \n");
> 	/* access dm for u,p */
> 	ierr = DMCompositeGetEntries(dm_stokes,&dav,&dap);CHKERRQ(ierr);
> 	
> 	/* access mat nest */
> 	ierr = DMCompositeGetGlobalISs(dm_stokes,&is);CHKERRQ(ierr);
> 	
> 	ierr = MatGetSubMatrix(A,is[0],is[0],MAT_INITIAL_MATRIX,&Auu);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[0],is[1],MAT_INITIAL_MATRIX,&Aup);CHKERRQ(ierr);
> 	
> 	/* enforce bc's */
>   ierr = DMDAGetGlobalIndices(dav,NULL,&g_idx);CHKERRQ(ierr);
> 	
>   ierr = DMDAGetGhostCorners(dav,&si,&sj,0,&nx,&ny,0);CHKERRQ(ierr);
>   ierr = DMDAGetInfo(dav,0,&M,&N,0,0,0,0,&n_dofs,0,0,0,0,0);CHKERRQ(ierr);
> 	
> 	max_bcs = n_dofs*(2*nx+2*ny);
>   ierr = PetscMalloc(sizeof(PetscInt)*max_bcs,&bc_global_ids);CHKERRQ(ierr);
>   ierr = PetscMalloc(sizeof(PetscScalar)*max_bcs,&bc_vals);CHKERRQ(ierr);
> 	
>   /* init the entries to -1 so VecSetValues will ignore them */
>   for (i = 0; i < max_bcs; i++) {
> 		bc_global_ids[i] = -1;
> 	}
> 	
> 	nbcs = 0;
> 	
> 	// left 
> 	i = 0;
> 	d_idx = 0;
> 	if (si == 0) {
> 		for (j = 0; j < ny; j++) {
> 			PetscInt local_id;
> 			
> 			local_id = i+j*nx;
> 			bc_global_ids[nbcs] = g_idx[n_dofs*local_id+d_idx];
> 			bc_vals[nbcs]       = 0.0;
> 			nbcs++;
> 		}
> 	}
> 	
> 	// right 
> 	i = nx-1;
> 	d_idx = 0;
> 	if (si+nx == M) {
> 		for (j = 0; j < ny; j++) {
> 			PetscInt local_id;
> 			
> 			local_id = i+j*nx;
> 			bc_global_ids[nbcs] = g_idx[n_dofs*local_id+d_idx];
> 			bc_vals[nbcs]       = 0.0;
> 			nbcs++;
> 		}
> 	}
> 	
> 	// bottom 
> 	j = 0;
> 	d_idx = 1;
> 	if (sj == 0) {
> 		for (i=0; i<nx; i++) {
> 			PetscInt local_id;
> 			
> 			local_id = i+j*nx;
> 			bc_global_ids[nbcs] = g_idx[n_dofs*local_id+d_idx];
> 			bc_vals[nbcs]       = 0.0;
> 			nbcs++;
> 		}
> 	}
> 	
> 	// top 
> 	j = ny-1;
> 	d_idx = 1;
> 	if (sj+ny == N) {
> 		for (i=0; i<nx; i++) {
> 			PetscInt local_id;
> 			
> 			local_id = i+j*nx;
> 			bc_global_ids[nbcs] = g_idx[n_dofs*local_id+d_idx];
> 			bc_vals[nbcs]       = 0.0;
> 			nbcs++;
> 		}
> 	}
> 	
>   if (b) {
>     ierr = VecSetValues(b,nbcs,bc_global_ids,bc_vals,INSERT_VALUES);CHKERRQ(ierr);
>     ierr = VecAssemblyBegin(b);CHKERRQ(ierr);
>     ierr = VecAssemblyEnd(b);CHKERRQ(ierr);
>   }
> 	
> 	 if (Auu) {
> 		 ierr = MatZeroRows(Auu,nbcs,bc_global_ids,1.0,0,0);CHKERRQ(ierr);
> 	 }
> 	 if (Aup) {
> 		 ierr = MatZeroRows(Aup,nbcs,bc_global_ids,0.0,0,0);CHKERRQ(ierr);
> 	 }
> 	
> 	ierr = MatDestroy(&Auu);CHKERRQ(ierr);
> 	ierr = MatDestroy(&Aup);CHKERRQ(ierr);
> 	
> 	ierr = ISDestroy(&is[0]);CHKERRQ(ierr);
> 	ierr = ISDestroy(&is[1]);CHKERRQ(ierr);
> 	ierr = PetscFree(is);CHKERRQ(ierr);
> 	
>   ierr = PetscFree(bc_vals);CHKERRQ(ierr);
>   ierr = PetscFree(bc_global_ids);CHKERRQ(ierr);
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DBCApplySym_FreeSlip"
> PetscErrorCode Stokes2DBCApplySym_FreeSlip(DM dm_stokes,Mat A,Mat B,Vec b)
> {
>   DM             dav,dap;
> 	Mat            Auu,Aup,Apu;
> 	IS             *is;
>   PetscInt       si,sj,nx,ny,i,j,d_idx;
>   PetscInt       M,N;
>   PetscInt       *g_idx;
>   PetscInt       *bc_global_ids;
>   PetscScalar    *bc_vals;
>   PetscInt       nbcs,max_bcs;
>   PetscInt       n_dofs;
>   PetscErrorCode ierr;
> 	
>   PetscFunctionBeginUser;
>
> 	PetscPrintf(PETSC_COMM_WORLD,"Stokes2DBCApplySym_FreeSlip \n");
> 	
> 	/* access dm for u,p */
> 	ierr = DMCompositeGetEntries(dm_stokes,&dav,&dap);CHKERRQ(ierr);
> 	
> 	/* access mat nest */
> 	ierr = DMCompositeGetGlobalISs(dm_stokes,&is);CHKERRQ(ierr);
> 	
> 	ierr = MatGetSubMatrix(A,is[0],is[0],MAT_INITIAL_MATRIX,&Auu);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[0],is[1],MAT_INITIAL_MATRIX,&Aup);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[1],is[0],MAT_INITIAL_MATRIX,&Apu);CHKERRQ(ierr);
> 	
> 	/* enforce bc's */
>   ierr = DMDAGetGlobalIndices(dav,NULL,&g_idx);CHKERRQ(ierr);
> 	
>   ierr = DMDAGetGhostCorners(dav,&si,&sj,0,&nx,&ny,0);CHKERRQ(ierr);
>   ierr = DMDAGetInfo(dav,0,&M,&N,0,0,0,0,&n_dofs,0,0,0,0,0);CHKERRQ(ierr);
> 	
> 	max_bcs = n_dofs*(2*nx+2*ny);
>   ierr = PetscMalloc(sizeof(PetscInt)*max_bcs,&bc_global_ids);CHKERRQ(ierr);
>   ierr = PetscMalloc(sizeof(PetscScalar)*max_bcs,&bc_vals);CHKERRQ(ierr);
> 	
>   /* init the entries to -1 so VecSetValues will ignore them */
>   for (i = 0; i < max_bcs; i++) {
> 		bc_global_ids[i] = -1;
> 	}
>
> 	nbcs = 0;
> 		
> 	// left 
> 	i = 0;
> 	d_idx = 0;
> 	if (si == 0) {
> 		for (j = 0; j < ny; j++) {
> 			PetscInt local_id;
> 			
> 			local_id = i+j*nx;
> 			bc_global_ids[nbcs] = g_idx[n_dofs*local_id+d_idx];
> 			bc_vals[nbcs]       = 0.0;
> 			nbcs++;
> 		}
> 	}
> 	
> 	// right 
> 	i = nx-1;
> 	d_idx = 0;
> 	if (si+nx == M) {
> 		for (j = 0; j < ny; j++) {
> 			PetscInt local_id;
> 			
> 			local_id = i+j*nx;
> 			bc_global_ids[nbcs] = g_idx[n_dofs*local_id+d_idx];
> 			bc_vals[nbcs]       = 0.0;
> 			nbcs++;
> 		}
> 	}
>
> 	// bottom 
> 	j = 0;
> 	d_idx = 1;
> 	if (sj == 0) {
> 		for (i=0; i<nx; i++) {
> 			PetscInt local_id;
> 			
> 			local_id = i+j*nx;
> 			bc_global_ids[nbcs] = g_idx[n_dofs*local_id+d_idx];
> 			bc_vals[nbcs]       = 0.0;
> 			nbcs++;
> 		}
> 	}
> 	
> 	// top 
> 	j = ny-1;
> 	d_idx = 1;
> 	if (sj+ny == N) {
> 		for (i=0; i<nx; i++) {
> 			PetscInt local_id;
> 			
> 			local_id = i+j*nx;
> 			bc_global_ids[nbcs] = g_idx[n_dofs*local_id+d_idx];
> 			bc_vals[nbcs]       = 0.0;
> 			nbcs++;
> 		}
> 	}
> 	
>   if (b) {
>     ierr = VecSetValues(b,nbcs,bc_global_ids,bc_vals,INSERT_VALUES);CHKERRQ(ierr);
>     ierr = VecAssemblyBegin(b);CHKERRQ(ierr);
>     ierr = VecAssemblyEnd(b);CHKERRQ(ierr);
>   }
> 	
> 	ierr = MatZeroRowsColumns(Auu,nbcs,bc_global_ids,1.0,0,0);CHKERRQ(ierr);
> 	ierr = MatZeroRows(Aup,nbcs,bc_global_ids,0.0,0,0);CHKERRQ(ierr);
>
> 	///
> 	{
> 		PetscInt ref;
> 		Mat Apu_2;
> 		
> 		ierr  = PetscObjectGetReference((PetscObject)Apu,&ref);CHKERRQ(ierr);
> 		printf("ref(Apu) = %d \n",ref);
>
> 		ierr = MatDestroy(&Apu);CHKERRQ(ierr);
> 		ierr = MatDestroy(&Apu);CHKERRQ(ierr);
> 		ierr = MatDestroy(&Apu);CHKERRQ(ierr);
> 		
> 		ierr = MatCreateTranspose(Aup,&Apu_2);CHKERRQ(ierr);
> 		ierr  = PetscObjectReference((PetscObject)Apu_2);CHKERRQ(ierr);
> 		ierr  = PetscObjectReference((PetscObject)Apu_2);CHKERRQ(ierr);
> 		ierr  = PetscObjectGetReference((PetscObject)Apu_2,&ref);CHKERRQ(ierr);
> 		printf("ref(Apu_2) = %d \n",ref);
>
> 		
> 		MatNestSetSubMat(A,1,0,Apu_2);
> 		MatNestSetSubMat(B,1,0,Apu_2);
> 		ierr = MatDestroy(&Apu_2);CHKERRQ(ierr);
> 		ierr = MatDestroy(&Apu_2);CHKERRQ(ierr);
> 	}
> 	///
> 	 
> 	 
> 	ierr = MatDestroy(&Auu);CHKERRQ(ierr);
> 	ierr = MatDestroy(&Aup);CHKERRQ(ierr);
>
> 	ierr = ISDestroy(&is[0]);CHKERRQ(ierr);
> 	ierr = ISDestroy(&is[1]);CHKERRQ(ierr);
> 	ierr = PetscFree(is);CHKERRQ(ierr);
> 	
>   ierr = PetscFree(bc_vals);CHKERRQ(ierr);
>   ierr = PetscFree(bc_global_ids);CHKERRQ(ierr);
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DBCApply_FreeSlip_g"
> PetscErrorCode Stokes2DBCApply_FreeSlip_g(DM dm_stokes,Mat A,Mat B,Vec b,PetscBool symmetric,BCType sides[])
> {
>   DM             dav,dap;
> 	Mat            Auu,Aup,Apu;
> 	IS             *is;
>   PetscInt       si,sj,nx,ny,i,j,d_idx;
>   PetscInt       M,N;
>   PetscInt       *g_idx;
>   PetscInt       *bc_global_ids;
>   PetscScalar    *bc_vals;
>   PetscInt       nbcs,max_bcs;
>   PetscInt       n_dofs;
>   PetscErrorCode ierr;
> 	
>   PetscFunctionBeginUser;
> 	
> 	PetscPrintf(PETSC_COMM_WORLD,"Stokes2DBCApply_FreeSlip_g \n");
> 	if (symmetric) { PetscPrintf(PETSC_COMM_WORLD,"  + using symmetric bcs\n"); }
> 	else { PetscPrintf(PETSC_COMM_WORLD,"  + using non-symmetric bcs\n"); }
>
> 	
> 	/* access dm for u,p */
> 	ierr = DMCompositeGetEntries(dm_stokes,&dav,&dap);CHKERRQ(ierr);
> 	
> 	/* access mat nest */
> 	ierr = DMCompositeGetGlobalISs(dm_stokes,&is);CHKERRQ(ierr);
> 	
> 	ierr = MatGetSubMatrix(A,is[0],is[0],MAT_INITIAL_MATRIX,&Auu);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[0],is[1],MAT_INITIAL_MATRIX,&Aup);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[1],is[0],MAT_INITIAL_MATRIX,&Apu);CHKERRQ(ierr);
> 	
> 	/* enforce bc's */
>   ierr = DMDAGetGlobalIndices(dav,NULL,&g_idx);CHKERRQ(ierr);
> 	
>   ierr = DMDAGetGhostCorners(dav,&si,&sj,0,&nx,&ny,0);CHKERRQ(ierr);
>   ierr = DMDAGetInfo(dav,0,&M,&N,0,0,0,0,&n_dofs,0,0,0,0,0);CHKERRQ(ierr);
> 	
> 	max_bcs = n_dofs*(2*nx+2*ny);
>   ierr = PetscMalloc(sizeof(PetscInt)*max_bcs,&bc_global_ids);CHKERRQ(ierr);
>   ierr = PetscMalloc(sizeof(PetscScalar)*max_bcs,&bc_vals);CHKERRQ(ierr);
> 	
>   /* init the entries to -1 so VecSetValues will ignore them */
>   for (i = 0; i < max_bcs; i++) {
> 		bc_global_ids[i] = -1;
> 	}
> 	
> 	nbcs = 0;
> 	
> 	// left
> 	if (sides[3] == BCFreeSlip) {
> 		i = 0;
> 		d_idx = 0;
> 		if (si == 0) {
> 			for (j = 0; j < ny; j++) {
> 				PetscInt local_id;
> 				
> 				local_id = i+j*nx;
> 				bc_global_ids[nbcs] = g_idx[n_dofs*local_id+d_idx];
> 				bc_vals[nbcs]       = 0.0;
> 				nbcs++;
> 			}
> 		}
> 	} else if (sides[3] == BCNoSlip) {
> 		i = 0;
> 		if (si == 0) {
> 			for (j = 0; j < ny; j++) {
> 				PetscInt local_id;
> 				
> 				local_id = i+j*nx;
>
> 				d_idx = 0;
> 				bc_global_ids[nbcs] = g_idx[n_dofs*local_id+d_idx];
> 				bc_vals[nbcs]       = 0.0;
> 				nbcs++;
>
> 				d_idx = 1;
> 				bc_global_ids[nbcs] = g_idx[n_dofs*local_id+d_idx];
> 				bc_vals[nbcs]       = 0.0;
> 				nbcs++;
> 			}
> 		}
> 	}
> 	
> 	// right 
> 	if (sides[1] == BCFreeSlip) {
> 		i = nx-1;
> 		d_idx = 0;
> 		if (si+nx == M) {
> 			for (j = 0; j < ny; j++) {
> 				PetscInt local_id;
> 				
> 				local_id = i+j*nx;
> 				bc_global_ids[nbcs] = g_idx[n_dofs*local_id+d_idx];
> 				bc_vals[nbcs]       = 0.0;
> 				nbcs++;
> 			}
> 		}
> 	} else if (sides[1] == BCNoSlip) {
> 		i = nx-1;
> 		if (si+nx == M) {
> 			for (j = 0; j < ny; j++) {
> 				PetscInt local_id;
> 				
> 				local_id = i+j*nx;
> 				d_idx = 0;
> 				bc_global_ids[nbcs] = g_idx[n_dofs*local_id+d_idx];
> 				bc_vals[nbcs]       = 0.0;
> 				nbcs++;
>
> 				d_idx = 1;
> 				bc_global_ids[nbcs] = g_idx[n_dofs*local_id+d_idx];
> 				bc_vals[nbcs]       = 0.0;
> 				nbcs++;
> 			}
> 		}
> 		
> 	}
> 	
> 	// bottom 
> 	if (sides[2] == BCFreeSlip) {
> 		j = 0;
> 		d_idx = 1;
> 		if (sj == 0) {
> 			for (i=0; i<nx; i++) {
> 				PetscInt local_id;
> 				
> 				local_id = i+j*nx;
> 				bc_global_ids[nbcs] = g_idx[n_dofs*local_id+d_idx];
> 				bc_vals[nbcs]       = 0.0;
> 				nbcs++;
> 			}
> 		}
> 	} else if (sides[2] == BCNoSlip) {
> 		j = 0;
> 		if (sj == 0) {
> 			for (i=0; i<nx; i++) {
> 				PetscInt local_id;
> 				
> 				local_id = i+j*nx;
> 				d_idx = 0;
> 				bc_global_ids[nbcs] = g_idx[n_dofs*local_id+d_idx];
> 				bc_vals[nbcs]       = 0.0;
> 				nbcs++;
>
> 				d_idx = 1;
> 				bc_global_ids[nbcs] = g_idx[n_dofs*local_id+d_idx];
> 				bc_vals[nbcs]       = 0.0;
> 				nbcs++;
> 			}
> 		}
> 		
> 	}
> 	
> 	// top
> 	if (sides[0] == BCFreeSlip) {
> 		j = ny-1;
> 		d_idx = 1;
> 		if (sj+ny == N) {
> 			for (i=0; i<nx; i++) {
> 				PetscInt local_id;
> 				
> 				local_id = i+j*nx;
> 				bc_global_ids[nbcs] = g_idx[n_dofs*local_id+d_idx];
> 				bc_vals[nbcs]       = 0.0;
> 				nbcs++;
> 			}
> 		}
> 	} else if (sides[0] == BCNoSlip) {
> 		j = ny-1;
> 		if (sj+ny == N) {
> 			for (i=0; i<nx; i++) {
> 				PetscInt local_id;
> 				
> 				local_id = i+j*nx;
> 				d_idx = 0;
> 				bc_global_ids[nbcs] = g_idx[n_dofs*local_id+d_idx];
> 				bc_vals[nbcs]       = 0.0;
> 				nbcs++;
>
> 				d_idx = 1;
> 				bc_global_ids[nbcs] = g_idx[n_dofs*local_id+d_idx];
> 				bc_vals[nbcs]       = 0.0;
> 				nbcs++;
> 			}
> 		}
> 	}
> 	
>   if (b) {
>     ierr = VecSetValues(b,nbcs,bc_global_ids,bc_vals,INSERT_VALUES);CHKERRQ(ierr);
>     ierr = VecAssemblyBegin(b);CHKERRQ(ierr);
>     ierr = VecAssemblyEnd(b);CHKERRQ(ierr);
>   }
> 	
> 	///
> 	if (symmetric) {
> 		ierr = MatZeroRowsColumns(Auu,nbcs,bc_global_ids,1.0,0,0);CHKERRQ(ierr);
> 		ierr = MatZeroRows(Aup,nbcs,bc_global_ids,0.0,0,0);CHKERRQ(ierr);
> 		
> 		{
> 			PetscInt ref;
> 			Mat Apu_2;
> 			
> 			ierr  = PetscObjectGetReference((PetscObject)Apu,&ref);CHKERRQ(ierr);
> 			printf("ref(Apu) = %d \n",ref);
> 			
> 			ierr = MatDestroy(&Apu);CHKERRQ(ierr);
> 			ierr = MatDestroy(&Apu);CHKERRQ(ierr);
> 			ierr = MatDestroy(&Apu);CHKERRQ(ierr);
> 			
> 			ierr = MatCreateTranspose(Aup,&Apu_2);CHKERRQ(ierr);
> 			ierr  = PetscObjectReference((PetscObject)Apu_2);CHKERRQ(ierr);
> 			ierr  = PetscObjectReference((PetscObject)Apu_2);CHKERRQ(ierr);
> 			ierr  = PetscObjectGetReference((PetscObject)Apu_2,&ref);CHKERRQ(ierr);
> 			printf("ref(Apu_2) = %d \n",ref);
> 			
> 			
> 			MatNestSetSubMat(A,1,0,Apu_2);
> 			MatNestSetSubMat(B,1,0,Apu_2);
> 			ierr = MatDestroy(&Apu_2);CHKERRQ(ierr);
> 			ierr = MatDestroy(&Apu_2);CHKERRQ(ierr);
> 		}
> 	} else {
> 		ierr = MatZeroRows(Auu,nbcs,bc_global_ids,1.0,0,0);CHKERRQ(ierr);
> 		ierr = MatZeroRows(Aup,nbcs,bc_global_ids,0.0,0,0);CHKERRQ(ierr);
> 	}
> 	///
> 	
> 	
> 	ierr = MatDestroy(&Auu);CHKERRQ(ierr);
> 	ierr = MatDestroy(&Aup);CHKERRQ(ierr);
> 	if (!symmetric) {
> 		ierr = MatDestroy(&Apu);CHKERRQ(ierr);
> 	}
> 	
> 	ierr = ISDestroy(&is[0]);CHKERRQ(ierr);
> 	ierr = ISDestroy(&is[1]);CHKERRQ(ierr);
> 	ierr = PetscFree(is);CHKERRQ(ierr);
> 	
>   ierr = PetscFree(bc_vals);CHKERRQ(ierr);
>   ierr = PetscFree(bc_global_ids);CHKERRQ(ierr);
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DBCApply_g"
> PetscErrorCode Stokes2DBCApply_g(DM dm_stokes,Mat A,Mat B,Vec b,PetscBool symmetric,PetscInt side,PetscInt dof,PetscScalar value)
> {
>   DM             dav,dap;
> 	Mat            Auu,Aup,Apu;
> 	IS             *is;
>   PetscInt       si,sj,nx,ny,i,j;
>   PetscInt       M,N;
>   PetscInt       *g_idx;
>   PetscInt       *bc_global_ids;
>   PetscScalar    *bc_vals;
>   PetscInt       nbcs,max_bcs;
>   PetscInt       n_dofs;
>   PetscErrorCode ierr;
> 	
>   PetscFunctionBeginUser;
> 	
> 	PetscPrintf(PETSC_COMM_WORLD,"Stokes2DBCApply_FreeSlip_g \n");
> 	if (symmetric) { PetscPrintf(PETSC_COMM_WORLD,"  + using symmetric bcs\n"); }
> 	else { PetscPrintf(PETSC_COMM_WORLD,"  + using non-symmetric bcs\n"); }
> 	
> 	
> 	/* access dm for u,p */
> 	ierr = DMCompositeGetEntries(dm_stokes,&dav,&dap);CHKERRQ(ierr);
> 	
> 	/* access mat nest */
> 	ierr = DMCompositeGetGlobalISs(dm_stokes,&is);CHKERRQ(ierr);
> 	
> 	ierr = MatGetSubMatrix(A,is[0],is[0],MAT_INITIAL_MATRIX,&Auu);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[0],is[1],MAT_INITIAL_MATRIX,&Aup);CHKERRQ(ierr);
> 	ierr = MatGetSubMatrix(A,is[1],is[0],MAT_INITIAL_MATRIX,&Apu);CHKERRQ(ierr);
> 	
> 	/* enforce bc's */
>   ierr = DMDAGetGlobalIndices(dav,NULL,&g_idx);CHKERRQ(ierr);
> 	
>   ierr = DMDAGetGhostCorners(dav,&si,&sj,0,&nx,&ny,0);CHKERRQ(ierr);
>   ierr = DMDAGetInfo(dav,0,&M,&N,0,0,0,0,&n_dofs,0,0,0,0,0);CHKERRQ(ierr);
> 	
> 	max_bcs = n_dofs*(2*nx+2*ny);
>   ierr = PetscMalloc(sizeof(PetscInt)*max_bcs,&bc_global_ids);CHKERRQ(ierr);
>   ierr = PetscMalloc(sizeof(PetscScalar)*max_bcs,&bc_vals);CHKERRQ(ierr);
> 	
>   /* init the entries to -1 so VecSetValues will ignore them */
>   for (i = 0; i < max_bcs; i++) {
> 		bc_global_ids[i] = -1;
> 	}
> 	
> 	nbcs = 0;
> 	
> 	switch (side) {
> 		case 0:
> 			j = ny-1;
> 			if (sj+ny == N) {
> 				for (i=0; i<nx; i++) {
> 					PetscInt local_id;
> 					
> 					local_id = i+j*nx;
> 					bc_global_ids[nbcs] = g_idx[n_dofs*local_id+dof];
> 					bc_vals[nbcs]       = value;
> 					nbcs++;
> 				}
> 			}
> 			break;
> 		
> 		case 1:
> 			i = nx-1;
> 			if (si+nx == M) {
> 				for (j = 0; j < ny; j++) {
> 					PetscInt local_id;
> 					
> 					local_id = i+j*nx;
> 					bc_global_ids[nbcs] = g_idx[n_dofs*local_id+dof];
> 					bc_vals[nbcs]       = value;
> 					nbcs++;
> 				}
> 			}
> 			break;
>
> 		case 2:
> 			j = 0;
> 			if (sj == 0) {
> 				for (i=0; i<nx; i++) {
> 					PetscInt local_id;
> 					
> 					local_id = i+j*nx;
> 					bc_global_ids[nbcs] = g_idx[n_dofs*local_id+dof];
> 					bc_vals[nbcs]       = value;
> 					nbcs++;
> 				}
> 			}
> 			break;
> 		
> 		case 3:
> 			i = 0;
> 			if (si == 0) {
> 				for (j = 0; j < ny; j++) {
> 					PetscInt local_id;
> 					
> 					local_id = i+j*nx;
> 					bc_global_ids[nbcs] = g_idx[n_dofs*local_id+dof];
> 					bc_vals[nbcs]       = value;
> 					nbcs++;
> 				}
> 			}
> 			break;
> 	}
> 	
>   if (b) {
>     ierr = VecSetValues(b,nbcs,bc_global_ids,bc_vals,INSERT_VALUES);CHKERRQ(ierr);
>     ierr = VecAssemblyBegin(b);CHKERRQ(ierr);
>     ierr = VecAssemblyEnd(b);CHKERRQ(ierr);
>   }
> 	
> 	///
> 	if (symmetric) {
> 		ierr = MatZeroRowsColumns(Auu,nbcs,bc_global_ids,1.0,0,0);CHKERRQ(ierr);
> 		ierr = MatZeroRows(Aup,nbcs,bc_global_ids,0.0,0,0);CHKERRQ(ierr);
> 		
> 		{
> 			PetscInt ref;
> 			Mat Apu_2;
> 			
> 			ierr  = PetscObjectGetReference((PetscObject)Apu,&ref);CHKERRQ(ierr);
> 			printf("ref(Apu) = %d \n",ref);
> 			
> 			ierr = MatDestroy(&Apu);CHKERRQ(ierr);
> 			ierr = MatDestroy(&Apu);CHKERRQ(ierr);
> 			ierr = MatDestroy(&Apu);CHKERRQ(ierr);
> 			
> 			ierr = MatCreateTranspose(Aup,&Apu_2);CHKERRQ(ierr);
> 			ierr  = PetscObjectReference((PetscObject)Apu_2);CHKERRQ(ierr);
> 			ierr  = PetscObjectReference((PetscObject)Apu_2);CHKERRQ(ierr);
> 			ierr  = PetscObjectGetReference((PetscObject)Apu_2,&ref);CHKERRQ(ierr);
> 			printf("ref(Apu_2) = %d \n",ref);
> 			
> 			
> 			MatNestSetSubMat(A,1,0,Apu_2);
> 			MatNestSetSubMat(B,1,0,Apu_2);
> 			ierr = MatDestroy(&Apu_2);CHKERRQ(ierr);
> 			ierr = MatDestroy(&Apu_2);CHKERRQ(ierr);
> 		}
> 	} else {
> 		ierr = MatZeroRows(Auu,nbcs,bc_global_ids,1.0,0,0);CHKERRQ(ierr);
> 		ierr = MatZeroRows(Aup,nbcs,bc_global_ids,0.0,0,0);CHKERRQ(ierr);
> 	}
> 	///
> 	
> 	
> 	ierr = MatDestroy(&Auu);CHKERRQ(ierr);
> 	ierr = MatDestroy(&Aup);CHKERRQ(ierr);
> 	if (!symmetric) {
> 		ierr = MatDestroy(&Apu);CHKERRQ(ierr);
> 	}
> 	
> 	ierr = ISDestroy(&is[0]);CHKERRQ(ierr);
> 	ierr = ISDestroy(&is[1]);CHKERRQ(ierr);
> 	ierr = PetscFree(is);CHKERRQ(ierr);
> 	
>   ierr = PetscFree(bc_vals);CHKERRQ(ierr);
>   ierr = PetscFree(bc_global_ids);CHKERRQ(ierr);
> 	
>   PetscFunctionReturn(0);
> }
>
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DSetPDECoefficients_SolCx"
> PetscErrorCode Stokes2DSetPDECoefficients_SolCx(DM dm_stokes,DM dm_coeff,Vec coeff)
> {
>   PetscErrorCode ierr;
> 	DM dav,dap,cdav;
> 	PetscScalar dx,dy;
> 	PetscInt M,N;
> 	Vec vel_coords;
> 	DMDACoor2d **LA_vel_coords;
> 	GaussPointCoefficients **LA_coeff;
> 	PetscReal opts_eta0,opts_eta1,opts_xc;
> 	PetscInt  opts_nz;
> 	PetscInt i,j,si,sj,nx,ny,p;
> 	
>   PetscFunctionBeginUser;
> 	
> 	PetscPrintf(PETSC_COMM_WORLD,"ProblemType: SolCx \n");
> 	
> 	/* access dm for u,p */
> 	ierr = DMCompositeGetEntries(dm_stokes,&dav,&dap);CHKERRQ(ierr);
> 	
>   /* unit box [0,1] x [0,1] */
>   ierr = DMDASetUniformCoordinates(dav,0.0,1.0,0.0,1.0,0.0,0.0);CHKERRQ(ierr);
>
>   ierr = DMDAGetInfo(dm_coeff,0,&M,&N,0,0,0,0,0,0,0,0,0,0);CHKERRQ(ierr);
>   dx   = 1.0/((PetscReal)(M));
>   dy   = 1.0/((PetscReal)(N));
> 	ierr = DMDASetUniformCoordinates(dm_coeff,0.0+0.5*dx,1.0-0.5*dx,0.0+0.5*dy,1.0-0.5*dy,0.0,0.0);CHKERRQ(ierr);
>
>   ierr = DMDAGetInfo(dap,0,&M,&N,0,0,0,0,0,0,0,0,0,0);CHKERRQ(ierr);
>   dx   = 1.0/((PetscReal)(M));
>   dy   = 1.0/((PetscReal)(N));
>   ierr = DMDASetUniformCoordinates(dap,0.0+0.5*dx,1.0-0.5*dx,0.0+0.5*dy,1.0-0.5*dy,0.0,0.0);CHKERRQ(ierr);
>
> 	
> 	
>   ierr = DMGetCoordinateDM(dav,&cdav);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(dav,&vel_coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cdav,vel_coords,&LA_vel_coords);CHKERRQ(ierr);
> 	
> 	ierr = DMDAVecGetArray(dm_coeff,coeff,&LA_coeff);CHKERRQ(ierr);
> 	
>   ierr = DMDAGetGhostCorners(dm_coeff,&si,&sj,0,&nx,&ny,0);CHKERRQ(ierr);
> 	
>   /* interpolate the coordinates */
>   for (j = sj; j < sj+ny; j++) {
>     for (i = si; i < si+nx; i++) {
>       PetscInt    ngp;
>       PetscScalar gp_xi[GAUSS_POINTS][2],gp_weight[GAUSS_POINTS];
>       PetscScalar el_coords[8];
> 			
>       ierr = GetElementCoords(LA_vel_coords,i,j,el_coords);CHKERRQ(ierr);
>       ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
> 			
>       for (p = 0; p < GAUSS_POINTS; p++) {
>         PetscScalar gp_x,gp_y;
>         PetscInt    n;
>         PetscScalar xi_p[2],Ni_p[4];
> 				
>         xi_p[0] = gp_xi[p][0];
>         xi_p[1] = gp_xi[p][1];
>         FEBasisQ1Evaluate_Ni(xi_p,Ni_p);
> 				
>         gp_x = 0.0;
>         gp_y = 0.0;
>         for (n = 0; n < NODES_PER_EL; n++) {
>           gp_x = gp_x+Ni_p[n]*el_coords[2*n];
>           gp_y = gp_y+Ni_p[n]*el_coords[2*n+1];
>         }
>         LA_coeff[j][i].gp_coords[2*p]   = gp_x;
>         LA_coeff[j][i].gp_coords[2*p+1] = gp_y;
>       }
>     }
>   }
> 	
>   /* define the coefficients */
> 	opts_eta0 = 1.0;
> 	opts_eta1 = 1.0;
> 	opts_xc   = 0.5;
> 	opts_nz   = 1;
> 	ierr = PetscOptionsGetReal(PETSC_NULL,"-solcx_eta0",&opts_eta0,0);CHKERRQ(ierr);
> 	ierr = PetscOptionsGetReal(PETSC_NULL,"-solcx_eta1",&opts_eta1,0);CHKERRQ(ierr);
> 	ierr = PetscOptionsGetReal(PETSC_NULL,"-solcx_xc",&opts_xc,0);CHKERRQ(ierr);
> 	ierr = PetscOptionsGetInt(PETSC_NULL,"-solcx_nz",&opts_nz,0);CHKERRQ(ierr);
>
>   for (j = sj; j < sj+ny; j++) {
>     for (i = si; i < si+nx; i++) {
>       PetscReal coord_x,coord_y;
> 			
> 			for (p = 0; p < GAUSS_POINTS; p++) {
>
> 				coord_x = PetscRealPart(LA_coeff[j][i].gp_coords[2*p]);
> 				coord_y = PetscRealPart(LA_coeff[j][i].gp_coords[2*p+1]);
> 				
> 				LA_coeff[j][i].eta[p] = opts_eta0;
> 				if (coord_x > opts_xc) { 
> 					LA_coeff[j][i].eta[p] = opts_eta1;
> 				}
> 				
> 				LA_coeff[j][i].fx[p] = 0.0;
> 				LA_coeff[j][i].fy[p] = sin((PetscScalar)opts_nz*PETSC_PI*coord_y)*cos(1.0*PETSC_PI*coord_x);
> 			}
> 		}
> 	}	
> 	ierr = DMDAVecRestoreArray(dm_coeff,coeff,&LA_coeff);CHKERRQ(ierr);
>   ierr = DMDAVecRestoreArray(cdav,vel_coords,&LA_vel_coords);CHKERRQ(ierr);
> 	
> 	
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "_DefineMeshGeomRT"
> PetscErrorCode _DefineMeshGeomRT(DM da,PetscScalar Amax,PetscScalar y_ref,PetscScalar m)
> {
> 	DM cda;
> 	Vec coords;
> 	DMDACoor2d **LA_coords;
> 	PetscInt si,sj,nx,ny,i,j;
> 	PetscScalar a;
> 	PetscErrorCode ierr;
> 	
> 	PetscFunctionBeginUser;
> 	
> 	/* deform the vel mesh with a sin */
>   ierr = DMGetCoordinateDM(da,&cda);CHKERRQ(ierr);
>   ierr = DMGetCoordinates(da,&coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cda,coords,&LA_coords);CHKERRQ(ierr);
>   ierr = DMDAGetCorners(da,&si,&sj,0,&nx,&ny,0);CHKERRQ(ierr);
> 	
>   for (j = sj; j < sj+ny; j++) {
>     for (i = si; i < si+nx; i++) {
> 			PetscScalar x,y,yn;
> 			
> 			x = LA_coords[j][i].x;
> 			y = LA_coords[j][i].y;
> 			
> 			if (y < y_ref) {
> 				a = Amax * (1.0 - (y_ref-y)/y_ref);
> 			} else {
> 				a = Amax * (1.0 - (y-y_ref)/y_ref );
> 			}
> 			
> 			//printf("y %1.4e : a = %1.4e \n",y,a);
> 			yn = y + a*sin(M_PI*x*m);
> 			LA_coords[j][i].y = yn;
> 		}
>   }
>
>   ierr = DMDAVecRestoreArray(cda,coords,&LA_coords);CHKERRQ(ierr);
> 	PetscFunctionReturn(0);
> }	
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DSetPDECoefficients_RT"
> PetscErrorCode Stokes2DSetPDECoefficients_RT(DM dm_stokes,DM dm_coeff,Vec coeff)
> {
>   PetscErrorCode ierr;
> 	DM dav,dap,cdav;
> 	PetscScalar dx,dy;
> 	PetscInt M,N;
> 	Vec vel_coords;
> 	DMDACoor2d **LA_vel_coords;
> 	GaussPointCoefficients **LA_coeff;
> 	PetscReal opts_eta0,opts_eta1;
> 	PetscInt i,j,si,sj,nx,ny,p;
> 	PetscScalar Amax,m,y_ref;
> 	
>   PetscFunctionBeginUser;
>
> 	PetscPrintf(PETSC_COMM_WORLD,"ProblemType: RT \n");
> 	m = 7.4;
> 	Amax = 0.01;
> 	y_ref = 0.5;
> 	
> 	/* access dm for u,p */
> 	ierr = DMCompositeGetEntries(dm_stokes,&dav,&dap);CHKERRQ(ierr);
> 	
>   /* unit box [0,1] x [0,1] */
>   ierr = DMDASetUniformCoordinates(dav,0.0,1.0,0.0,1.0,0.0,0.0);CHKERRQ(ierr);
> 	
>   ierr = DMDAGetInfo(dm_coeff,0,&M,&N,0,0,0,0,0,0,0,0,0,0);CHKERRQ(ierr);
>   dx   = 1.0/((PetscReal)(M));
>   dy   = 1.0/((PetscReal)(N));
> 	ierr = DMDASetUniformCoordinates(dm_coeff,0.0+0.5*dx,1.0-0.5*dx,0.0+0.5*dy,1.0-0.5*dy,0.0,0.0);CHKERRQ(ierr);
> 	
>   ierr = DMDAGetInfo(dap,0,&M,&N,0,0,0,0,0,0,0,0,0,0);CHKERRQ(ierr);
>   dx   = 1.0/((PetscReal)(M));
>   dy   = 1.0/((PetscReal)(N));
>   ierr = DMDASetUniformCoordinates(dap,0.0+0.5*dx,1.0-0.5*dx,0.0+0.5*dy,1.0-0.5*dy,0.0,0.0);CHKERRQ(ierr);
> 	
> 	
> 	/* deform the vel mesh with a sin */
> #if 1
> 	ierr = _DefineMeshGeomRT(dav,Amax,y_ref,m);CHKERRQ(ierr);
> 	ierr = Stokes2DBillinearizeMesh(dm_stokes);CHKERRQ(ierr);
> 	ierr = _DefineMeshGeomRT(dap,Amax,y_ref,m);CHKERRQ(ierr);
> 	ierr = _DefineMeshGeomRT(dm_coeff,Amax,y_ref,m);CHKERRQ(ierr);
> #endif	
> 		
>   /* interpolate the coordinates to gp */
>   ierr = DMGetCoordinateDM(dav,&cdav);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(dav,&vel_coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cdav,vel_coords,&LA_vel_coords);CHKERRQ(ierr);
>
>   ierr = DMDAGetGhostCorners(dm_coeff,&si,&sj,0,&nx,&ny,0);CHKERRQ(ierr);
> 	ierr = DMDAVecGetArray(dm_coeff,coeff,&LA_coeff);CHKERRQ(ierr);
> 	
>   for (j = sj; j < sj+ny; j++) {
>     for (i = si; i < si+nx; i++) {
>       PetscInt    ngp;
>       PetscScalar gp_xi[GAUSS_POINTS][2],gp_weight[GAUSS_POINTS];
>       PetscScalar el_coords[8];
> 			PetscScalar gp_x,gp_y;
> 			PetscInt    n;
> 			PetscScalar xi_p[2],Ni_p[4];
> 			
>       ierr = GetElementCoords(LA_vel_coords,i,j,el_coords);CHKERRQ(ierr);
>       ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
> 			
>       for (p = 0; p < GAUSS_POINTS; p++) {
> 				
>         xi_p[0] = gp_xi[p][0];
>         xi_p[1] = gp_xi[p][1];
>         FEBasisQ1Evaluate_Ni(xi_p,Ni_p);
> 				
>         gp_x = 0.0;
>         gp_y = 0.0;
>         for (n = 0; n < NODES_PER_EL; n++) {
>           gp_x = gp_x+Ni_p[n]*el_coords[2*n];
>           gp_y = gp_y+Ni_p[n]*el_coords[2*n+1];
>         }
>         LA_coeff[j][i].gp_coords[2*p]   = gp_x;
>         LA_coeff[j][i].gp_coords[2*p+1] = gp_y;
>       }
>
>     }
>   }
>   ierr = DMDAVecRestoreArray(cdav,vel_coords,&LA_vel_coords);CHKERRQ(ierr);
> 	
>   /* define the coefficients */
> 	opts_eta0 = 1.0;
> 	opts_eta1 = 1.0;
> 	ierr = PetscOptionsGetReal(PETSC_NULL,"-rt_eta0",&opts_eta0,0);CHKERRQ(ierr);
> 	ierr = PetscOptionsGetReal(PETSC_NULL,"-rt_eta1",&opts_eta1,0);CHKERRQ(ierr);
> 	
>   for (j = sj; j < sj+ny; j++) {
>     for (i = si; i < si+nx; i++) {
>       PetscReal coord_x,coord_y,interface_y;
> 			
> 			for (p = 0; p < GAUSS_POINTS; p++) {
> 				
> 				coord_x = PetscRealPart(LA_coeff[j][i].gp_coords[2*p]);
> 				coord_y = PetscRealPart(LA_coeff[j][i].gp_coords[2*p+1]);
> 				
> 				
> 				interface_y = y_ref + Amax*sin(M_PI*coord_x*m);
> 				
> 				LA_coeff[j][i].eta[p] = opts_eta1;
> 				LA_coeff[j][i].fx[p] = 0.0;
> 				LA_coeff[j][i].fy[p] = 3300.0;
> 				if (coord_y < interface_y) { 
> 					LA_coeff[j][i].eta[p] = opts_eta0;
> 					LA_coeff[j][i].fx[p] = 0.0;
> 					LA_coeff[j][i].fy[p] = 3200.0;
> 				}
> 				LA_coeff[j][i].fy[p] = -9.8 * LA_coeff[j][i].fy[p] * 1.0e0;
> 			}
> 			for (p=0; p<GAUSS_POINTS; p++) {
> 				LA_coeff[j][i].eta[p] = LA_coeff[j][i].eta[0];
> 				LA_coeff[j][i].fx[p] = LA_coeff[j][i].fx[0];
> 				LA_coeff[j][i].fy[p] = LA_coeff[j][i].fy[0];
> 			}
> 			
> 		}
> 	}	
>
> 	/* constant properties over element */
>
> #if 0
> 	/* constant properties over macro element */
>   for (j = sj; j < sj+ny; j++) {
>     for (i = si; i < si+nx; i++) {
>
> 			if ( (i%2==0) && (j%2==0) ) {
> 				PetscScalar e,fx,fy;
> 				PetscInt ii,jj;
> 				
> 				e = LA_coeff[j][i].eta[0];
> 				fx = LA_coeff[j][i].fx[0];
> 				fy = LA_coeff[j][i].fy[0];
> 				
> 				for (ii=i; ii<i+2; ii++) {
> 					for (jj=j; jj<j+2; jj++) {
> 						for (p=0; p<GAUSS_POINTS; p++) {
> 							LA_coeff[jj][ii].eta[p] = e;
> 							LA_coeff[jj][ii].fx[p] = fx;
> 							LA_coeff[jj][ii].fy[p] = fy;
> 						}
> 					}
> 				}
> 			}
> 			
> 		}
> 	}	
> #endif	
> 	
> #if 1
> 	/* average properties over macro element */
>   for (j = sj; j < sj+ny; j++) {
>     for (i = si; i < si+nx; i++) {
> 			
> 			if ( (i%2==0) && (j%2==0) ) {
> 				PetscScalar e=0.0,fx=0.0,fy=0.0;
> 				PetscInt ii,jj,sum;
> 				
> 				sum = 0;
> 				for (ii=i; ii<i+2; ii++) {
> 					for (jj=j; jj<j+2; jj++) {
> 						for (p=0; p<GAUSS_POINTS; p++) {
> 							e += LA_coeff[jj][ii].eta[p];
> 							fx += LA_coeff[jj][ii].fx[p];
> 							fy += LA_coeff[jj][ii].fy[p];
> 							sum++;
> 						}
> 					}
> 				}
> 				
> 				for (ii=i; ii<i+2; ii++) {
> 					for (jj=j; jj<j+2; jj++) {
> 						for (p=0; p<GAUSS_POINTS; p++) {
> 							LA_coeff[jj][ii].eta[p] = e/((PetscScalar)sum);
> 							LA_coeff[jj][ii].fx[p] = fx/((PetscScalar)sum);
> 							LA_coeff[jj][ii].fy[p] = fy/((PetscScalar)sum);
> 						}
> 					}
> 				}
> 				
> 			}
> 			
> 		}
> 	}	
> #endif	
> 	
> 	
> 	ierr = DMDAVecRestoreArray(dm_coeff,coeff,&LA_coeff);CHKERRQ(ierr);
>   PetscFunctionReturn(0);
> }
>
>
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DSetPDECoefficients_Layer"
> PetscErrorCode Stokes2DSetPDECoefficients_Layer(DM dm_stokes,DM dm_coeff,Vec coeff)
> {
>   PetscErrorCode ierr;
> 	DM dav,dap,cdav;
> 	PetscScalar dx,dy;
> 	PetscInt M,N;
> 	Vec vel_coords;
> 	DMDACoor2d **LA_vel_coords;
> 	GaussPointCoefficients **LA_coeff;
> 	PetscReal opts_eta[4];
> 	PetscReal lH[5] = { 0.0, 0.1, 0.2, 0.3, 0.5 };
> 	PetscReal Lx,Ly;
> 	PetscInt i,j,si,sj,nx,ny,p;
> 	
>   PetscFunctionBeginUser;
> 	
> 	PetscPrintf(PETSC_COMM_WORLD,"ProblemType: Layer \n");
> 	
> 	/* access dm for u,p */
> 	ierr = DMCompositeGetEntries(dm_stokes,&dav,&dap);CHKERRQ(ierr);
> 	
>   /* unit box [0,5] x [0,0.5] */
> 	Lx = 5.0;
> 	Ly = lH[4];
>   ierr = DMDASetUniformCoordinates(dav,0.0,Lx,0.0,Ly,0.0,0.0);CHKERRQ(ierr);
> 	
>   ierr = DMDAGetInfo(dm_coeff,0,&M,&N,0,0,0,0,0,0,0,0,0,0);CHKERRQ(ierr);
>   dx   = Lx/((PetscReal)(M));
>   dy   = Ly/((PetscReal)(N));
> 	ierr = DMDASetUniformCoordinates(dm_coeff,0.0+0.5*dx,Lx-0.5*dx,0.0+0.5*dy,Ly-0.5*dy,0.0,0.0);CHKERRQ(ierr);
> 	
>   ierr = DMDAGetInfo(dap,0,&M,&N,0,0,0,0,0,0,0,0,0,0);CHKERRQ(ierr);
>   dx   = Lx/((PetscReal)(M));
>   dy   = Ly/((PetscReal)(N));
>   ierr = DMDASetUniformCoordinates(dap,0.0+0.5*dx,Lx-0.5*dx,0.0+0.5*dy,Ly-0.5*dy,0.0,0.0);CHKERRQ(ierr);
> 	
> 	
> 	
>   ierr = DMGetCoordinateDM(dav,&cdav);CHKERRQ(ierr);
>   ierr = DMGetCoordinatesLocal(dav,&vel_coords);CHKERRQ(ierr);
>   ierr = DMDAVecGetArray(cdav,vel_coords,&LA_vel_coords);CHKERRQ(ierr);
> 	
> 	ierr = DMDAVecGetArray(dm_coeff,coeff,&LA_coeff);CHKERRQ(ierr);
> 	
>   ierr = DMDAGetGhostCorners(dm_coeff,&si,&sj,0,&nx,&ny,0);CHKERRQ(ierr);
> 	
>   /* interpolate the coordinates */
>   for (j = sj; j < sj+ny; j++) {
>     for (i = si; i < si+nx; i++) {
>       PetscInt    ngp;
>       PetscScalar gp_xi[GAUSS_POINTS][2],gp_weight[GAUSS_POINTS];
>       PetscScalar el_coords[8];
> 			
>       ierr = GetElementCoords(LA_vel_coords,i,j,el_coords);CHKERRQ(ierr);
>       ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
> 			
>       for (p = 0; p < GAUSS_POINTS; p++) {
>         PetscScalar gp_x,gp_y;
>         PetscInt    n;
>         PetscScalar xi_p[2],Ni_p[4];
> 				
>         xi_p[0] = gp_xi[p][0];
>         xi_p[1] = gp_xi[p][1];
>         FEBasisQ1Evaluate_Ni(xi_p,Ni_p);
> 				
>         gp_x = 0.0;
>         gp_y = 0.0;
>         for (n = 0; n < NODES_PER_EL; n++) {
>           gp_x = gp_x+Ni_p[n]*el_coords[2*n];
>           gp_y = gp_y+Ni_p[n]*el_coords[2*n+1];
>         }
>         LA_coeff[j][i].gp_coords[2*p]   = gp_x;
>         LA_coeff[j][i].gp_coords[2*p+1] = gp_y;
>       }
>     }
>   }
> 	
>   /* define the coefficients */
> 	opts_eta[0] = 1.0e-1;
> 	opts_eta[1] = 1.0;
> 	opts_eta[2] = 1.0e-2;
> 	opts_eta[3] = 1.0;
> 	ierr = PetscOptionsGetReal(PETSC_NULL,"-l_eta0",&opts_eta[0],0);CHKERRQ(ierr);
> 	ierr = PetscOptionsGetReal(PETSC_NULL,"-l_eta1",&opts_eta[1],0);CHKERRQ(ierr);
> 	ierr = PetscOptionsGetReal(PETSC_NULL,"-l_eta2",&opts_eta[2],0);CHKERRQ(ierr);
> 	ierr = PetscOptionsGetReal(PETSC_NULL,"-l_eta3",&opts_eta[3],0);CHKERRQ(ierr);
> 	
>   for (j = sj; j < sj+ny; j++) {
>     for (i = si; i < si+nx; i++) {
>       PetscReal coord_x,coord_y,perturb;
> 			
> 			perturb = 1.0e-2*( -1.0+2.0*rand()/((PetscReal)RAND_MAX) );
> 			
> 			for (p = 0; p < GAUSS_POINTS; p++) {
> 				
> 				coord_x = PetscRealPart(LA_coeff[j][i].gp_coords[2*p]);
> 				coord_y = PetscRealPart(LA_coeff[j][i].gp_coords[2*p+1]);
> 				
> 				LA_coeff[j][i].eta[p] = opts_eta[0];
> 				if (coord_y > lH[0]+perturb) { 
> 					LA_coeff[j][i].eta[p] = opts_eta[0];
> 				}
> 				if (coord_y > lH[1]+perturb) { 
> 					LA_coeff[j][i].eta[p] = opts_eta[1];
> 				}
> 				if (coord_y > lH[2]+perturb) { 
> 					LA_coeff[j][i].eta[p] = opts_eta[2];
> 				}
> 				if (coord_y > lH[3]+perturb) { 
> 					LA_coeff[j][i].eta[p] = opts_eta[3];
> 				}
> 				
> 				LA_coeff[j][i].fx[p] = 0.0;
> 				LA_coeff[j][i].fy[p] = 0.0;
> 			}
> 		}
> 	}	
> 	ierr = DMDAVecRestoreArray(dm_coeff,coeff,&LA_coeff);CHKERRQ(ierr);
>   ierr = DMDAVecRestoreArray(cdav,vel_coords,&LA_vel_coords);CHKERRQ(ierr);
> 	
>   PetscFunctionReturn(0);
> }
>
>
> /*
>  Element type
>  0: Q1-P0
>  1: Q1-P0 (edge stab)
>  2: Q1(macro)-P1
>  3: Q1-Q1 (Bochev stab)
> */
> #undef __FUNCT__
> #define __FUNCT__ "Stokes2DSolve"
> PetscErrorCode Stokes2DSolve(const PetscInt mx,const PetscInt my)
> {
>   PetscErrorCode ierr;
> 	PetscInt et,pt,nt;
> 	PetscBool flg;
> 	DM dm_stokes,dm_stokes_coeff;
> 	Vec x,b,coeff;
> 	Mat A,B;
> 	KSP ksp;
> 	char filename[100];
> 	
>   PetscFunctionBeginUser;
>
> 	PetscPrintf(PETSC_COMM_WORLD,"Mesh: %d x %d \n",mx,my);
> 	et = 0;
> 	PetscOptionsGetInt(PETSC_NULL,"-element_type",&et,&flg);CHKERRQ(ierr);
> 	switch (et) {
> 		case 0:
> 			PetscPrintf(PETSC_COMM_WORLD,"ElementType: Q1-P0 \n");
> 			ierr = Stokes2DCreateDMDA_Q1P0(mx,my,&dm_stokes,&dm_stokes_coeff);CHKERRQ(ierr);
> 			break;
> 		case 1:
> 			PetscPrintf(PETSC_COMM_WORLD,"ElementType: Q1-P0 <stab> \n");
> 			ierr = Stokes2DCreateDMDA_Q1P0(mx,my,&dm_stokes,&dm_stokes_coeff);CHKERRQ(ierr);
> 			break;
> 		case 2:
> 			PetscPrintf(PETSC_COMM_WORLD,"ElementType: Q1[macro]-P1 \n");
> 			ierr = Stokes2DCreateDMDA_Q1mP1(mx,my,&dm_stokes,&dm_stokes_coeff);CHKERRQ(ierr);
> 			break;
> 		case 3:
> 			PetscPrintf(PETSC_COMM_WORLD,"ElementType: Q1[macro]-P0 \n");
> 			ierr = Stokes2DCreateDMDA_Q1mP0(mx,my,&dm_stokes,&dm_stokes_coeff);CHKERRQ(ierr);
> 			break;
> 	}
>
> 	/* vectors */
> 	ierr = DMCreateGlobalVector(dm_stokes,&x);CHKERRQ(ierr);
> 	ierr = DMCreateGlobalVector(dm_stokes,&b);CHKERRQ(ierr);
> 	ierr = DMCreateGlobalVector(dm_stokes_coeff,&coeff);CHKERRQ(ierr);
> 	/* operators */
> 	switch (et) {
> 		case 0:
> 			ierr = Stokes2DCreateOperators_Q1P0(dm_stokes,&A,&B);CHKERRQ(ierr);
> 			break;
> 		case 1:
> 			ierr = Stokes2DCreateOperators_Q1P0(dm_stokes,&A,&B);CHKERRQ(ierr);
> 			break;
> 		case 2:
> 			ierr = Stokes2DCreateOperators_Q1mP1(dm_stokes,&A,&B);CHKERRQ(ierr);
> 			break;
> 		case 3:
> 			ierr = Stokes2DCreateOperators_Q1mP0(dm_stokes,&A,&B);CHKERRQ(ierr);
> 			break;
> 	}
> 	
> 	pt = 1;
> 	PetscOptionsGetInt(PETSC_NULL,"-problem_type",&pt,&flg);CHKERRQ(ierr);
> 	switch (pt) {
> 		case 0:
> 			ierr = Stokes2DSetPDECoefficients_SolCx(dm_stokes,dm_stokes_coeff,coeff);CHKERRQ(ierr);
> 			break;
> 		case 1:
> 			ierr = Stokes2DSetPDECoefficients_RT(dm_stokes,dm_stokes_coeff,coeff);CHKERRQ(ierr);
> 			break;
> 			
> 		case 2:
> 			ierr = Stokes2DSetPDECoefficients_Layer(dm_stokes,dm_stokes_coeff,coeff);CHKERRQ(ierr);
> 			break;
> 	}
> 	
> 	/* assemble */
> 	switch (et) {
> 		case 0:
> 			ierr = Stokes2DAssembleOperators_Q1P0(A,B,dm_stokes,dm_stokes_coeff,coeff);CHKERRQ(ierr);
> 			break;
> 		case 1:
> 			ierr = Stokes2DAssembleOperators_Q1P0_stab(A,B,dm_stokes,dm_stokes_coeff,coeff);CHKERRQ(ierr);
> 			break;
> 		case 2:
> 			ierr = Stokes2DAssembleOperators_Q1mP1(A,B,dm_stokes,dm_stokes_coeff,coeff);CHKERRQ(ierr);
> 			//ierr = Stokes2DAssembleOperators_Q1mP1_nearly_incompressible(A,B,dm_stokes,dm_stokes_coeff,coeff);CHKERRQ(ierr);
> 			break;
> 		case 3:
> 			ierr = Stokes2DAssembleOperators_Q1mP0(A,B,dm_stokes,dm_stokes_coeff,coeff);CHKERRQ(ierr);
> 			break;
> 	}
> 	
> 	ierr = Stokes2DAssembleRHS_Q1(b,dm_stokes,dm_stokes_coeff,coeff);CHKERRQ(ierr);
> 	
> 	//ierr = Stokes2DBCApply_FreeSlip(dm_stokes,A,B,b);CHKERRQ(ierr);
> 	//ierr = Stokes2DBCApplySym_FreeSlip(dm_stokes,A,B,b);CHKERRQ(ierr);
> 	{
> 		PetscBool sym = PETSC_FALSE;
> 		BCType sides[4];
> 							
> 		
> 		PetscOptionsGetBool(PETSC_NULL,"-symm",&sym,0);
> 		
> 		switch (pt) {
> 			case 0:
> 				sides[0] = BCFreeSlip;
> 				sides[1] = BCFreeSlip;
> 				sides[2] = BCFreeSlip;
> 				sides[3] = BCFreeSlip;
>
> 				ierr = Stokes2DBCApply_FreeSlip_g(dm_stokes,A,B,b,sym,sides);CHKERRQ(ierr);		
> 				break;
>
> 			case 1:
> 				sides[0] = BCFreeSurface;
> 				sides[1] = BCNoSlip;
> 				sides[2] = BCFreeSlip;
> 				sides[3] = BCNoSlip;
> 				
> 				ierr = Stokes2DBCApply_FreeSlip_g(dm_stokes,A,B,b,sym,sides);CHKERRQ(ierr);		
> 				// apply compression on the left wall: this really makes for a tough problem
> 				//ierr = Stokes2DBCApply_g(dm_stokes,A,B,b,PETSC_FALSE,1, 0,-16.0);CHKERRQ(ierr);
> 				break;
> 				
> 			case 2:
> 				
>
> 				ierr = Stokes2DBCApply_g(dm_stokes,A,B,b,PETSC_FALSE,1, 0,-1.0);CHKERRQ(ierr);
> 				ierr = Stokes2DBCApply_g(dm_stokes,A,B,b,PETSC_FALSE,3, 0,1.0);CHKERRQ(ierr);
> 				ierr = Stokes2DBCApply_g(dm_stokes,A,B,b,PETSC_FALSE,2, 1,0.0);CHKERRQ(ierr);
> /*
> 				ierr = Stokes2DBCApply_g(dm_stokes,A,B,b,PETSC_FALSE,1, 0,0.0);CHKERRQ(ierr);
> 				ierr = Stokes2DBCApply_g(dm_stokes,A,B,b,PETSC_FALSE,1, 1,0.0);CHKERRQ(ierr);
>
> 				ierr = Stokes2DBCApply_g(dm_stokes,A,B,b,PETSC_FALSE,2, 0,0.0);CHKERRQ(ierr);
> 				ierr = Stokes2DBCApply_g(dm_stokes,A,B,b,PETSC_FALSE,2, 1,0.0);CHKERRQ(ierr);
>
> 				ierr = Stokes2DBCApply_g(dm_stokes,A,B,b,PETSC_FALSE,3, 0,0.0);CHKERRQ(ierr);
> 				ierr = Stokes2DBCApply_g(dm_stokes,A,B,b,PETSC_FALSE,3, 1,0.0);CHKERRQ(ierr);
>
> 				ierr = Stokes2DBCApply_g(dm_stokes,A,B,b,PETSC_FALSE,0, 0,1.0);CHKERRQ(ierr);
> 				ierr = Stokes2DBCApply_g(dm_stokes,A,B,b,PETSC_FALSE,0, 1,0.0);CHKERRQ(ierr);
>
> 				VecView(b,PETSC_VIEWER_STDOUT_WORLD);
> */ 
> 				break;
> 		}
> 		
>
> 	
> 		sprintf(filename,"Mesh%dx%d",mx,my);
> 		ierr = Stokes2DMatViewMatlab(dm_stokes,A,B,filename);CHKERRQ(ierr);
> 	}
> 	
> 	/* solve */
> 	ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
>   ierr = KSPSetOptionsPrefix(ksp,"stokes_");CHKERRQ(ierr);
>
> 	if (et != 2) {
> 		ierr = KSPSetOperators(ksp,A,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
> 	} else {
> 		// for robust pc
> 		ierr = KSPSetOperators(ksp,A,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
> 		// for nearly incomp pc
> 		//ierr = KSPSetOperators(ksp,A,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
> 		
> 	}
>   
> 	ierr = KSPSetDM(ksp,dm_stokes);CHKERRQ(ierr);
>   ierr = KSPSetDMActive(ksp,PETSC_FALSE);CHKERRQ(ierr);
>   ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
>
>   {
> 		Mat Spp;
>     PC pc;
> 		IS *is;
> 		DM dav,dap;
> 		PetscInt nsplits;
> 		KSP *ksp_ii;
> 		
> 		/* configure for fieldsplit */
> 		ierr = DMCompositeGetGlobalISs(dm_stokes,&is);CHKERRQ(ierr);
>
> 		ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr);
> 		
> 		ierr = PCSetType(pc,PCFIELDSPLIT);CHKERRQ(ierr);
> 		ierr = PCFieldSplitSetIS(pc,"u",is[0]);CHKERRQ(ierr);
> 		ierr = PCFieldSplitSetIS(pc,"p",is[1]);CHKERRQ(ierr);
>
> 		ierr = PCFieldSplitSetType(pc,PC_COMPOSITE_SCHUR);CHKERRQ(ierr);
> 		ierr = PCFieldSplitSetSchurFactType(pc,PC_FIELDSPLIT_SCHUR_FACT_UPPER);CHKERRQ(ierr);
> 		
> 		ierr = MatGetSubMatrix(B,is[1],is[1],MAT_INITIAL_MATRIX,&Spp);CHKERRQ(ierr);
> 		ierr = PCFieldSplitSchurPrecondition(pc,PC_FIELDSPLIT_SCHUR_PRE_USER,Spp);CHKERRQ(ierr);
>
> 		ierr = MatDestroy(&Spp);CHKERRQ(ierr);
> 		ierr = ISDestroy(&is[0]);CHKERRQ(ierr);
> 		ierr = ISDestroy(&is[1]);CHKERRQ(ierr);
> 		ierr = PetscFree(is);CHKERRQ(ierr);
>   }
>
> 	//
> 	ierr = KSPSetUp(ksp);CHKERRQ(ierr);
>   {
>     PC pc;
> 		DM dav,dap;
> 		PetscInt nsplits;
> 		KSP *ksp_ii;
> 		
> 		ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr);
>
> 		ierr = DMCompositeGetEntries(dm_stokes,&dav,&dap);CHKERRQ(ierr);
> 		ierr = PCFieldSplitGetSubKSP(pc,&nsplits,&ksp_ii);CHKERRQ(ierr);
> 		ierr = KSPSetDM(ksp_ii[0],dav);CHKERRQ(ierr);
> 		ierr = KSPSetDMActive(ksp_ii[0],PETSC_TRUE);CHKERRQ(ierr);
> 	}		
> 	//
> 	
> 	ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr);
> 	
> 	MatView(A,PETSC_VIEWER_STDOUT_WORLD);
> 	MatView(B,PETSC_VIEWER_STDOUT_WORLD);
> 	
> 	
> 	/* view */
> 	ierr = StokesUPDMDAView(dm_stokes,x,0);CHKERRQ(ierr);
> 	ierr = StokesCoeffDMDAView(dm_stokes_coeff,coeff);CHKERRQ(ierr);
> 	
> 	
> 	
> 	/* release memory */
> 	ierr = VecDestroy(&x);CHKERRQ(ierr);
> 	ierr = VecDestroy(&b);CHKERRQ(ierr);
> 	ierr = VecDestroy(&coeff);CHKERRQ(ierr);
> 	ierr = MatDestroy(&A);CHKERRQ(ierr);
> 	ierr = MatDestroy(&B);CHKERRQ(ierr);
> 	ierr = KSPDestroy(&ksp);CHKERRQ(ierr);
> 	
> 	ierr = DMDestroy(&dm_stokes);CHKERRQ(ierr);
> 	ierr = DMDestroy(&dm_stokes_coeff);CHKERRQ(ierr);
> 	
>   PetscFunctionReturn(0);
> }
>
> #undef __FUNCT__
> #define __FUNCT__ "main"
> int main(int argc,char **args)
> {
>   PetscErrorCode ierr;
>   PetscInt       mx,my;
> 	PetscBool      flg;
> 	
>   ierr = PetscInitialize(&argc,&args,(char*)0,help);CHKERRQ(ierr);
> 	
>   mx = my = 10;
>   ierr = PetscOptionsGetInt(NULL,"-mx",&mx,&flg);CHKERRQ(ierr);
> 	flg = PETSC_FALSE;
>   ierr = PetscOptionsGetInt(NULL,"-my",&my,&flg);CHKERRQ(ierr);
> 	if (!flg) { my = mx; }
> 	
>   //ierr = solve_stokes_2d_coupled(mx,my);CHKERRQ(ierr);
> 	ierr = Stokes2DSolve(mx,my);CHKERRQ(ierr);
>
>   ierr = PetscFinalize();
>   return 0;
> }



More information about the petsc-dev mailing list