KSP Object:
  type: fgmres
    GMRES: restart=30, using Classical (unmodified) Gram-Schmidt Orthogonalization with no iterative refinement
    GMRES: happy breakdown tolerance 1e-30
  maximum iterations=10000, initial guess is zero
  tolerances:  relative=1e-05, absolute=1e-50, divergence=10000
  right preconditioning
  using UNPRECONDITIONED norm type for convergence test
PC Object:
  type: mg
    MG: type is FULL, levels=3 cycles=v
  Coarse grid solver -- level 0 presmooths=1 postsmooths=1 -----
    KSP Object:(mg_coarse_)
      type: preonly
      maximum iterations=1, initial guess is zero
      tolerances:  relative=1e-05, absolute=1e-50, divergence=10000
      left preconditioning
      using PRECONDITIONED norm type for convergence test
    PC Object:(mg_coarse_)
      type: redundant
        Redundant preconditioner: First (color=0) of 4 PCs follows
      KSP Object:(mg_coarse_redundant_)
        type: preonly
        maximum iterations=10000, initial guess is zero
        tolerances:  relative=1e-05, absolute=1e-50, divergence=10000
        left preconditioning
        using PRECONDITIONED norm type for convergence test
      PC Object:(mg_coarse_redundant_)
        type: lu
          LU: out-of-place factorization
          tolerance for zero pivot 1e-12
          matrix ordering: nd
          factor fill ratio given 5, needed 44.0819
            Factored matrix follows:
              Matrix Object:
                type=seqaij, rows=32768, cols=32768
                package used to perform factorization: petsc
                total: nonzeros=9840492, allocated nonzeros=9840492
                  not using I-node routines
        linear system matrix = precond matrix:
        Matrix Object:
          type=seqaij, rows=32768, cols=32768
          total: nonzeros=223232, allocated nonzeros=229376
            not using I-node routines
      KSP Object:(mg_coarse_redundant_)
        type: preonly
        maximum iterations=10000, initial guess is zero
        tolerances:  relative=1e-05, absolute=1e-50, divergence=10000
        left preconditioning
        using PRECONDITIONED norm type for convergence test
      PC Object:(mg_coarse_redundant_)
        type: lu
          LU: out-of-place factorization
          tolerance for zero pivot 1e-12
          matrix ordering: nd
          factor fill ratio given 5, needed 44.0819
            Factored matrix follows:
              Matrix Object:
                type=seqaij, rows=32768, cols=32768
                package used to perform factorization: petsc
                total: nonzeros=9840492, allocated nonzeros=9840492
                  not using I-node routines
        linear system matrix = precond matrix:
        Matrix Object:
          type=seqaij, rows=32768, cols=32768
          total: nonzeros=223232, allocated nonzeros=229376
            not using I-node routines
      KSP Object:(mg_coarse_redundant_)
        type: preonly
        maximum iterations=10000, initial guess is zero
        tolerances:  relative=1e-05, absolute=1e-50, divergence=10000
        left preconditioning
        using PRECONDITIONED norm type for convergence test
      PC Object:(mg_coarse_redundant_)
        type: lu
          LU: out-of-place factorization
          tolerance for zero pivot 1e-12
          matrix ordering: nd
          factor fill ratio given 5, needed 44.0819
            Factored matrix follows:
              Matrix Object:
                type=seqaij, rows=32768, cols=32768
                package used to perform factorization: petsc
                total: nonzeros=9840492, allocated nonzeros=9840492
                  not using I-node routines
        linear system matrix = precond matrix:
        Matrix Object:
          type=seqaij, rows=32768, cols=32768
          total: nonzeros=223232, allocated nonzeros=229376
            not using I-node routines
      linear system matrix = precond matrix:
      Matrix Object:
        type=mpiaij, rows=32768, cols=32768
        total: nonzeros=223232, allocated nonzeros=223232
          not using I-node (on process 0) routines
  Down solver (pre-smoother) on level 1 smooths=1 --------------------
    KSP Object:(mg_levels_1_)
      type: gmres
        GMRES: restart=30, using Classical (unmodified) Gram-Schmidt Orthogonalization with no iterative refinement
        GMRES: happy breakdown tolerance 1e-30
      maximum iterations=1
      tolerances:  relative=1e-05, absolute=1e-50, divergence=10000
      left preconditioning
      using nonzero initial guess
      using PRECONDITIONED norm type for convergence test
    PC Object:(mg_levels_1_)
      type: bjacobi
        block Jacobi: number of blocks = 4
        Local solve is same for all blocks, in the following KSP and PC objects:
      KSP Object:(mg_levels_1_sub_)
        type: preonly
        maximum iterations=10000, initial guess is zero
        tolerances:  relative=1e-05, absolute=1e-50, divergence=10000
        left preconditioning
        using PRECONDITIONED norm type for convergence test
      PC Object:(mg_levels_1_sub_)
        type: ilu
          ILU: out-of-place factorization
          0 levels of fill
          tolerance for zero pivot 1e-12
          using diagonal shift to prevent zero pivot
          matrix ordering: natural
          factor fill ratio given 1, needed 1
            Factored matrix follows:
              Matrix Object:
                type=seqaij, rows=64512, cols=64512
                package used to perform factorization: petsc
                total: nonzeros=441472, allocated nonzeros=441472
                  not using I-node routines
        linear system matrix = precond matrix:
        Matrix Object:
          type=seqaij, rows=64512, cols=64512
          total: nonzeros=441472, allocated nonzeros=441472
            not using I-node routines
      linear system matrix = precond matrix:
      Matrix Object:
        type=mpiaij, rows=250047, cols=250047
        total: nonzeros=1726515, allocated nonzeros=1726515
          not using I-node (on process 0) routines
  Up solver (post-smoother) same as down solver (pre-smoother)
  Down solver (pre-smoother) on level 2 smooths=1 --------------------
    KSP Object:(mg_levels_2_)
      type: gmres
        GMRES: restart=30, using Classical (unmodified) Gram-Schmidt Orthogonalization with no iterative refinement
        GMRES: happy breakdown tolerance 1e-30
      maximum iterations=1
      tolerances:  relative=1e-05, absolute=1e-50, divergence=10000
      left preconditioning
      using nonzero initial guess
      using PRECONDITIONED norm type for convergence test
    PC Object:(mg_levels_2_)
      type: bjacobi
        block Jacobi: number of blocks = 4
        Local solve is same for all blocks, in the following KSP and PC objects:
      KSP Object:(mg_levels_2_sub_)
        type: preonly
        maximum iterations=10000, initial guess is zero
        tolerances:  relative=1e-05, absolute=1e-50, divergence=10000
        left preconditioning
        using PRECONDITIONED norm type for convergence test
      PC Object:(mg_levels_2_sub_)
        type: ilu
          ILU: out-of-place factorization
          0 levels of fill
          tolerance for zero pivot 1e-12
          using diagonal shift to prevent zero pivot
          matrix ordering: natural
          factor fill ratio given 1, needed 1
            Factored matrix follows:
              Matrix Object:
                type=seqaij, rows=496125, cols=496125
                package used to perform factorization: petsc
                total: nonzeros=3433437, allocated nonzeros=3433437
                  not using I-node routines
        linear system matrix = precond matrix:
        Matrix Object:
          type=seqaij, rows=496125, cols=496125
          total: nonzeros=3433437, allocated nonzeros=3433437
            not using I-node routines
      linear system matrix = precond matrix:
      Matrix Object:
        type=mpiaij, rows=1953125, cols=1953125
        total: nonzeros=13578125, allocated nonzeros=13578125
          not using I-node (on process 0) routines
  Up solver (post-smoother) same as down solver (pre-smoother)
  linear system matrix = precond matrix:
  Matrix Object:
    type=mpiaij, rows=1953125, cols=1953125
    total: nonzeros=13578125, allocated nonzeros=13578125
      not using I-node (on process 0) routines
************************************************************************************************************************
***             WIDEN YOUR WINDOW TO 120 CHARACTERS.  Use 'enscript -r -fCourier9' to print this document            ***
************************************************************************************************************************

---------------------------------------------- PETSc Performance Summary: ----------------------------------------------

./ex22 on a linux-gnu named saw318 with 4 processors, by fpoulin Thu Feb 23 19:55:14 2012
Using Petsc Release Version 3.1.0, Patch 4, Fri Jul 30 14:42:02 CDT 2010

                         Max       Max/Min        Avg      Total 
Time (sec):           1.452e+01      1.00069   1.452e+01
Objects:              1.800e+02      1.00000   1.800e+02
Flops:                5.076e+09      1.00309   5.068e+09  2.027e+10
Flops/sec:            3.496e+08      1.00280   3.491e+08  1.396e+09
MPI Messages:         2.895e+02      1.30700   2.428e+02  9.710e+02
MPI Message Lengths:  8.398e+06      1.03233   3.385e+04  3.287e+07
MPI Reductions:       2.670e+02      1.00000

Flop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)
                            e.g., VecAXPY() for real vectors of length N --> 2N flops
                            and VecAXPY() for complex vectors of length N --> 8N flops

Summary of Stages:   ----- Time ------  ----- Flops -----  --- Messages ---  -- Message Lengths --  -- Reductions --
                        Avg     %Total     Avg     %Total   counts   %Total     Avg         %Total   counts   %Total 
 0:      Main Stage: 1.4517e+01 100.0%  2.0273e+10 100.0%  9.710e+02 100.0%  3.385e+04      100.0%  1.990e+02  74.5% 

------------------------------------------------------------------------------------------------------------------------
See the 'Profiling' chapter of the users' manual for details on interpreting output.
Phase summary info:
   Count: number of times phase was executed
   Time and Flops: Max - maximum over all processors
                   Ratio - ratio of maximum to minimum over all processors
   Mess: number of messages sent
   Avg. len: average message length
   Reduct: number of global reductions
   Global: entire computation
   Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().
      %T - percent time in this phase         %F - percent flops in this phase
      %M - percent messages in this phase     %L - percent message lengths in this phase
      %R - percent reductions in this phase
   Total Mflop/s: 10e-6 * (sum of flops over all processors)/(max time over all processors)
------------------------------------------------------------------------------------------------------------------------
Event                Count      Time (sec)     Flops                             --- Global ---  --- Stage ---   Total
                   Max Ratio  Max     Ratio   Max  Ratio  Mess   Avg len Reduct  %T %F %M %L %R  %T %F %M %L %R Mflop/s
------------------------------------------------------------------------------------------------------------------------

--- Event Stage 0: Main Stage

VecMDot               21 1.0 1.1765e-01 2.1 1.35e+07 1.0 0.0e+00 0.0e+00 2.1e+01  1  0  0  0  8   1  0  0  0 11   449
VecNorm               55 1.0 2.9007e-01 2.1 2.61e+07 1.0 0.0e+00 0.0e+00 5.5e+01  1  1  0  0 21   1  1  0  0 28   353
VecScale              40 1.0 1.7501e-02 2.1 9.49e+06 1.0 0.0e+00 0.0e+00 0.0e+00  0  0  0  0  0   0  0  0  0  0  2129
VecCopy               19 1.0 2.8604e-02 1.4 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00  0  0  0  0  0   0  0  0  0  0     0
VecSet               123 1.0 7.2754e-02 1.3 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00  0  0  0  0  0   0  0  0  0  0     0
VecAXPY               35 1.0 5.7420e-02 1.4 1.66e+07 1.0 0.0e+00 0.0e+00 0.0e+00  0  0  0  0  0   0  0  0  0  0  1135
VecAYPX                9 1.0 1.3805e-02 1.5 1.88e+06 1.0 0.0e+00 0.0e+00 0.0e+00  0  0  0  0  0   0  0  0  0  0   533
VecMAXPY              40 1.0 6.4681e-02 1.3 2.39e+07 1.0 0.0e+00 0.0e+00 0.0e+00  0  0  0  0  0   0  0  0  0  0  1454
VecScatterBegin       94 1.0 8.3461e-03 1.2 0.00e+00 0.0 6.3e+02 3.4e+04 0.0e+00  0  0 64 64  0   0  0 64 64  0     0
VecScatterEnd         94 1.0 2.1123e+0019.9 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00  8  0  0  0  0   8  0  0  0  0     0
VecNormalize          36 1.0 1.8364e-01 1.9 2.25e+07 1.0 0.0e+00 0.0e+00 3.6e+01  1  0  0  0 13   1  0  0  0 18   481
MatMult               52 1.0 6.6340e-01 1.1 1.53e+08 1.0 4.0e+02 3.3e+04 0.0e+00  4  3 41 40  0   4  3 41 40  0   909
MatMultAdd             9 1.0 1.2370e-01 2.7 1.25e+07 1.0 4.5e+01 6.5e+03 0.0e+00  1  0  5  1  0   1  0  5  1  0   398
MatMultTranspose      15 1.0 1.8693e+0019.1 2.37e+07 1.0 7.5e+01 7.1e+03 0.0e+00  7  0  8  2  0   7  0  8  2  0    50
MatSolve              60 1.0 1.0250e+00 1.3 3.19e+08 1.0 0.0e+00 0.0e+00 0.0e+00  6  6  0  0  0   6  6  0  0  0  1233
MatLUFactorSym         1 1.0 2.6189e-01 1.1 0.00e+00 0.0 0.0e+00 0.0e+00 1.0e+00  2  0  0  0  0   2  0  0  0  1     0
MatLUFactorNum         3 1.0 1.0582e+01 1.2 4.48e+09 1.0 0.0e+00 0.0e+00 0.0e+00 67 88  0  0  0  67 88  0  0  0  1692
MatILUFactorSym        2 1.0 9.8679e-02 1.4 0.00e+00 0.0 0.0e+00 0.0e+00 2.0e+00  1  0  0  0  1   1  0  0  0  1     0
MatAssemblyBegin       9 1.0 1.5689e-0144.0 0.00e+00 0.0 0.0e+00 0.0e+00 1.6e+01  1  0  0  0  6   1  0  0  0  8     0
MatAssemblyEnd         9 1.0 1.1368e-01 1.1 0.00e+00 0.0 6.8e+01 5.4e+03 3.8e+01  1  0  7  1 14   1  0  7  1 19     0
MatGetRowIJ            3 1.0 2.0552e-03 1.2 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00  0  0  0  0  0   0  0  0  0  0     0
MatGetOrdering         3 1.0 4.2291e-02 1.2 0.00e+00 0.0 0.0e+00 0.0e+00 6.0e+00  0  0  0  0  2   0  0  0  0  3     0
MatView                8 1.3 1.9855e-02 3.7 0.00e+00 0.0 0.0e+00 0.0e+00 4.0e+00  0  0  0  0  1   0  0  0  0  2     0
MatGetRedundant        1 1.0 1.5441e-02 1.1 0.00e+00 0.0 3.6e+01 2.5e+05 2.0e+00  0  0  4 27  1   0  0  4 27  1     0
KSPGMRESOrthog        21 1.0 1.4442e-01 1.7 2.69e+07 1.0 0.0e+00 0.0e+00 2.1e+01  1  1  0  0  8   1  1  0  0 11   732
KSPSetup               7 1.0 5.6233e-02 1.2 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00  0  0  0  0  0   0  0  0  0  0     0
KSPSolve               1 1.0 1.3367e+01 1.0 5.07e+09 1.0 6.9e+02 4.5e+04 9.8e+01 92100 71 95 37  92100 71 95 49  1514
PCSetUp                3 1.0 1.1011e+01 1.2 4.48e+09 1.0 7.6e+01 1.4e+05 2.3e+01 69 88  8 32  9  69 88  8 32 12  1626
PCSetUpOnBlocks       18 1.0 1.9663e-01 1.2 1.01e+07 1.0 0.0e+00 0.0e+00 6.0e+00  1  0  0  0  2   1  0  0  0  3   200
PCApply                3 1.0 4.0372e+00 1.8 5.61e+08 1.0 5.9e+02 3.2e+04 7.5e+01 22 11 61 58 28  22 11 61 58 38   549
------------------------------------------------------------------------------------------------------------------------

Memory usage is given in bytes:

Object Type          Creations   Destructions     Memory  Descendants' Mem.
Reports information only for process 0.

--- Event Stage 0: Main Stage

           Container     1              1          396     0
   Distributed array     3              3      2373100     0
                 Vec    78             78    164986976     0
         Vec Scatter    13             13        11596     0
           Index Set    35             35     14481360     0
   IS L to G Mapping     3              3      2360656     0
              Matrix    19             19    282936968     0
       Krylov Solver    12             12        62712     0
      Preconditioner    12             12         8720     0
              Viewer     4              4         2208     0
========================================================================================================================
Average time to get PetscTime(): 9.53674e-08
Average time for MPI_Barrier(): 3.62396e-06
Average time for zero size MPI_Send(): 5.48363e-06
#PETSc Option Table entries:
-da_grid_x 32
-da_grid_y 32
-da_grid_z 32
-ksp_view
-log_summary
#End of PETSc Option Table entries
Compiled without FORTRAN kernels
Compiled with full precision matrices (default)
sizeof(short) 2 sizeof(int) 4 sizeof(long) 8 sizeof(void*) 8 sizeof(PetscScalar) 8
Configure run at: Thu Sep 16 10:15:19 2010
Configure options: --prefix=/work/syam/PETSC-3.1 --with-debugging=no --with-cc=icc --with-fc=ifort --with-cxx=icpc --ignoreWarnings=1 --with-shared --with-dynamic=0 --with-scalar-type=real --with-blas-lapack-lib=/opt/sharcnet/acml/4.3.0/ifort-64bit/ifort64/lib/libacml.so --with-mpiexec=/home/syam/bin/mpi_petsc2.run --with-mpi-shared=1 --with-x=0 --with-x11=0 --with-mpi-lib="[/opt/sharcnet/openmpi/1.4.2/intel/lib/libmpi_f90.so,/opt/sharcnet/openmpi/1.4.2/intel/lib/libmpi_f77.so,/opt/sharcnet/openmpi/1.4.2/intel/lib/libmpi_cxx.so,/opt/sharcnet/openmpi/1.4.2/intel/lib/libmpi.so]" --with-mpi-include=/opt/sharcnet/openmpi/1.4.2/intel/include
-----------------------------------------
Libraries compiled on Thu Sep 16 10:29:50 EDT 2010 on hnd50 
Machine characteristics: Linux hnd50 2.6.18-164.11.1.el5 #1 SMP Wed Jan 20 07:32:21 EST 2010 x86_64 x86_64 x86_64 GNU/Linux 
Using PETSc directory: /work/syam/mpi/petsc-3.1-p4
Using PETSc arch: linux-gnu-c-opt
-----------------------------------------
Using C compiler: icc -fPIC -O   
Using Fortran compiler: ifort -fPIC -O    
-----------------------------------------
Using include paths: -I/work/syam/mpi/petsc-3.1-p4/linux-gnu-c-opt/include -I/work/syam/mpi/petsc-3.1-p4/include -I/opt/sharcnet/openmpi/1.4.2/intel/include  
------------------------------------------
Using C linker: icc -fPIC -O 
Using Fortran linker: ifort -fPIC -O  
Using libraries: -Wl,-rpath,/work/syam/mpi/petsc-3.1-p4/linux-gnu-c-opt/lib -L/work/syam/mpi/petsc-3.1-p4/linux-gnu-c-opt/lib -lpetsc       -Wl,-rpath,/opt/sharcnet/acml/4.3.0/ifort-64bit/ifort64/lib -L/opt/sharcnet/acml/4.3.0/ifort-64bit/ifort64/lib -lacml -Wl,-rpath,/opt/sharcnet/openmpi/1.4.2/intel/lib -L/opt/sharcnet/openmpi/1.4.2/intel/lib -lmpi_f90 -lmpi_f77 -lmpi_cxx -lmpi -ldl -Wl,-rpath,/opt/sharcnet/intel/11.0.083/icc/lib/intel64 -L/opt/sharcnet/intel/11.0.083/icc/lib/intel64 -Wl,-rpath,/usr/lib/gcc/x86_64-redhat-linux/4.1.2 -L/usr/lib/gcc/x86_64-redhat-linux/4.1.2 -limf -lsvml -lipgo -ldecimal -lirc -lgcc_s -lirc_s -Wl,-rpath,/opt/sharcnet/intel/11.0.083/ifc/lib/intel64 -L/opt/sharcnet/intel/11.0.083/ifc/lib/intel64 -lifport -lifcore -lm -lpthread -lm -ldl -limf -lsvml -lipgo -ldecimal -lirc -lgcc_s -lirc_s -ldl  
------------------------------------------