[petsc-users] Concatenation of local-to-global matrix

Barry Smith bsmith at petsc.dev
Tue Oct 3 08:07:36 CDT 2023


   Take a look at MatCreateMPIMatConcatenateSeqMat_MPIAIJ() in src/mat/impls/aij/mpi/mpiaij.c  In that file you will find several routines similar to what you are building.

Note the preallocation:

    MatPreallocateBegin(comm, m, n, dnz, onz);
    for (i = 0; i < m; i++) {
      PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
      PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
      PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
    }
...
   PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
    PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));


Probably best to reuse the C code than have slower Python code.


> On Oct 3, 2023, at 6:05 AM, Thanasis Boutsikakis <thanasis.boutsikakis at corintis.com> wrote:
> 
> I am trying to multiply a sequential PETsc matrix with an mpi PETSc matrix in parallel. The final step is to concatenate the product matrix, which is a local sequential PETSc matrix that is different for every proc, so that I get the full mpi matrix as a result. This has proven to work, but setting the values one by one using a loop is very inefficient and slow.
> 
> In the following MFE, I am trying to make this concatenation more efficient by setting the values in batches. However it doesn’t work and I am wondering why:
> 
> """Experimenting with PETSc mat-mat multiplication"""
> 
> import time
> 
> import numpy as np
> from colorama import Fore
> from firedrake import COMM_SELF, COMM_WORLD
> from firedrake.petsc import PETSc
> from mpi4py import MPI
> from numpy.testing import assert_array_almost_equal
> 
> from utilities import Print
> 
> size = COMM_WORLD.size
> rank = COMM_WORLD.rank
> 
> def create_petsc_matrix(input_array, sparse=True):
>     """Create a PETSc matrix from an input_array
> 
>     Args:
>         input_array (np array): Input array
>         partition_like (PETSc mat, optional): Petsc matrix. Defaults to None.
>         sparse (bool, optional): Toggle for sparese or dense. Defaults to True.
> 
>     Returns:
>         PETSc mat: PETSc matrix
>     """
>     # Check if input_array is 1D and reshape if necessary
>     assert len(input_array.shape) == 2, "Input array should be 2-dimensional"
>     global_rows, global_cols = input_array.shape
> 
>     size = ((None, global_rows), (global_cols, global_cols))
> 
>     # Create a sparse or dense matrix based on the 'sparse' argument
>     if sparse:
>         matrix = PETSc.Mat().createAIJ(size=size, comm=COMM_WORLD)
>     else:
>         matrix = PETSc.Mat().createDense(size=size, comm=COMM_WORLD)
>     matrix.setUp()
> 
>     local_rows_start, local_rows_end = matrix.getOwnershipRange()
> 
>     for counter, i in enumerate(range(local_rows_start, local_rows_end)):
>         # Calculate the correct row in the array for the current process
>         row_in_array = counter + local_rows_start
>         matrix.setValues(
>             i, range(global_cols), input_array[row_in_array, :], addv=False
>         )
> 
>     # Assembly the matrix to compute the final structure
>     matrix.assemblyBegin()
>     matrix.assemblyEnd()
> 
>     return matrix
> 
> def get_local_submatrix(A):
>     """Get the local submatrix of A
> 
>     Args:
>         A (mpi PETSc mat): partitioned PETSc matrix
> 
>     Returns:
>         seq mat: PETSc matrix
>     """
>     local_rows_start, local_rows_end = A.getOwnershipRange()
>     local_rows = local_rows_end - local_rows_start
>     comm = A.getComm()
>     rows = PETSc.IS().createStride(
>         local_rows, first=local_rows_start, step=1, comm=comm
>     )
>     _, k = A.getSize()  # Get the number of columns (k) from A's size
>     cols = PETSc.IS().createStride(k, first=0, step=1, comm=comm)
> 
>     # Getting the local submatrix
>     # TODO: To be replaced by MatMPIAIJGetLocalMat() in the future (see petsc-users mailing list). There is a missing petsc4py binding, need to add it myself (and please create a merge request)
>     A_local = A.createSubMatrices(rows, cols)[0]
>     return A_local
> 
> 
> def create_petsc_matrix_seq(input_array):
>     """Building a sequential PETSc matrix from an array
> 
>     Args:
>         input_array (np array): Input array
> 
>     Returns:
>         seq mat: PETSc matrix
>     """
>     assert len(input_array.shape) == 2
> 
>     m, n = input_array.shape
>     matrix = PETSc.Mat().createAIJ(size=(m, n), comm=COMM_SELF)
>     matrix.setUp()
> 
>     matrix.setValues(range(m), range(n), input_array, addv=False)
> 
>     # Assembly the matrix to compute the final structure
>     matrix.assemblyBegin()
>     matrix.assemblyEnd()
> 
>     return matrix
> 
> 
> def multiply_matrices_seq(A_seq, B_seq):
>     """Multiply 2 sequential matrices
> 
>     Args:
>         A_seq (seqaij): local submatrix of A
>         B_seq (seqaij): sequential matrix B
> 
>     Returns:
>         seq mat: PETSc matrix that is the product of A_seq and B_seq
>     """
>     _, A_seq_cols = A_seq.getSize()
>     B_seq_rows, _ = B_seq.getSize()
>     assert (
>         A_seq_cols == B_seq_rows
>     ), f"Incompatible matrix sizes for multiplication: {A_seq_cols} != {B_seq_rows}"
>     C_local = A_seq.matMult(B_seq)
>     return C_local
> 
> 
> def concatenate_local_to_global_matrix(local_matrix, mat_type=None):
>     """Create the global matrix C from the local submatrix local_matrix
> 
>     Args:
>         local_matrix (seqaij): local submatrix of global_matrix
>         partition_like (mpiaij): partitioned PETSc matrix
>         mat_type (str): type of the global matrix. Defaults to None. If None, the type of local_matrix is used.
> 
>     Returns:
>         mpi PETSc mat: partitioned PETSc matrix
>     """
>     local_matrix_rows, local_matrix_cols = local_matrix.getSize()
>     global_rows = COMM_WORLD.allreduce(local_matrix_rows, op=MPI.SUM)
> 
>     # Determine the local portion of the vector
>     size = ((None, global_rows), (local_matrix_cols, local_matrix_cols))
> 
>     if mat_type is None:
>         mat_type = local_matrix.getType()
> 
>     if "dense" in mat_type:
>         sparse = False
>     else:
>         sparse = True
> 
>     if sparse:
>         global_matrix = PETSc.Mat().createAIJ(size=size, comm=COMM_WORLD)
>     else:
>         global_matrix = PETSc.Mat().createDense(size=size, comm=COMM_WORLD)
>     global_matrix.setUp()
> 
>     # The exscan operation is used to get the starting global row for each process.
>     # The result of the exclusive scan is the sum of the local rows from previous ranks.
>     global_row_start = COMM_WORLD.exscan(local_matrix_rows, op=MPI.SUM)
>     if rank == 0:
>         global_row_start = 0
> 
>     concatenate_start = time.time()
> 
>     # This works but is very inefficient
>     # for i in range(local_matrix_rows):
>     #     cols, values = local_matrix.getRow(i)
>     #     global_row = i + global_row_start
>     #     global_matrix.setValues(global_row, cols, values)
> 
>     all_cols = []
>     all_values = []
>     all_global_rows = [i + global_row_start for i in range(local_matrix_rows)]
> 
>     for i in range(len(all_global_rows)):
>         cols, values = local_matrix.getRow(i)
>         # print(f"cols: {cols}, values: {values}")
>         all_cols.append(cols)
>         all_values.append(values)
> 
>     for j in range(local_matrix_cols):
>         columns = [all_cols[i][j] for i in range(len(all_cols))]
>         values = [all_values[i][j] for i in range(len(all_values))]
> 
>         global_matrix.setValues(all_global_rows, columns, values)
> 
>     concatenate_end = time.time()
>     Print(
>         f"  -Setting values: {concatenate_end - concatenate_start: 2.2f} s",
>         Fore.GREEN,
>     )
> 
>     global_matrix.assemblyBegin()
>     global_matrix.assemblyEnd()
> 
>     return global_matrix
> 
> 
> # --------------------------------------------
> # EXP: Multiplication of an mpi PETSc matrix with a sequential PETSc matrix
> #  C = A * B
> # [m x k] = [m x k] * [k x k]
> # --------------------------------------------
> 
> m, k = 11, 7
> # Generate the random numpy matrices
> np.random.seed(0)  # sets the seed to 0
> A_np = np.random.randint(low=0, high=6, size=(m, k))
> B_np = np.random.randint(low=0, high=6, size=(k, k))
> 
> # Create B as a sequential matrix on each process
> B_seq = create_petsc_matrix_seq(B_np)
> 
> A = create_petsc_matrix(A_np)
> 
> # Getting the correct local submatrix to be multiplied by B_seq
> A_local = get_local_submatrix(A)
> 
> # Multiplication of 2 sequential matrices
> C_local = multiply_matrices_seq(A_local, B_seq)
> 
> # Creating the global C matrix
> C = concatenate_local_to_global_matrix(C_local) if size > 1 else C_local
> 
> # --------------------------------------------
> # TEST: Multiplication of 2 numpy matrices
> # --------------------------------------------
> AB_np = np.dot(A_np, B_np)
> Print(f"MATRIX AB_np [{AB_np.shape[0]}x{AB_np.shape[1]}]")
> Print(f"{AB_np}")
> 
> # Get the local values from C
> local_rows_start, local_rows_end = C.getOwnershipRange()
> C_local = C.getValues(range(local_rows_start, local_rows_end), range(k))
> 
> # Assert the correctness of the multiplication for the local subset
> assert_array_almost_equal(C_local, AB_np[local_rows_start:local_rows_end, :], decimal=5)
> 
> 
> 
> Any idea how to fix this?
> 
> Thanks,
> Thanos
> 

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/petsc-users/attachments/20231003/f3e4bd1e/attachment-0001.html>


More information about the petsc-users mailing list