[petsc-users] Concatenation of local-to-global matrix
Barry Smith
bsmith at petsc.dev
Tue Oct 3 08:07:36 CDT 2023
Take a look at MatCreateMPIMatConcatenateSeqMat_MPIAIJ() in src/mat/impls/aij/mpi/mpiaij.c In that file you will find several routines similar to what you are building.
Note the preallocation:
MatPreallocateBegin(comm, m, n, dnz, onz);
for (i = 0; i < m; i++) {
PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
}
...
PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
Probably best to reuse the C code than have slower Python code.
> On Oct 3, 2023, at 6:05 AM, Thanasis Boutsikakis <thanasis.boutsikakis at corintis.com> wrote:
>
> I am trying to multiply a sequential PETsc matrix with an mpi PETSc matrix in parallel. The final step is to concatenate the product matrix, which is a local sequential PETSc matrix that is different for every proc, so that I get the full mpi matrix as a result. This has proven to work, but setting the values one by one using a loop is very inefficient and slow.
>
> In the following MFE, I am trying to make this concatenation more efficient by setting the values in batches. However it doesn’t work and I am wondering why:
>
> """Experimenting with PETSc mat-mat multiplication"""
>
> import time
>
> import numpy as np
> from colorama import Fore
> from firedrake import COMM_SELF, COMM_WORLD
> from firedrake.petsc import PETSc
> from mpi4py import MPI
> from numpy.testing import assert_array_almost_equal
>
> from utilities import Print
>
> size = COMM_WORLD.size
> rank = COMM_WORLD.rank
>
> def create_petsc_matrix(input_array, sparse=True):
> """Create a PETSc matrix from an input_array
>
> Args:
> input_array (np array): Input array
> partition_like (PETSc mat, optional): Petsc matrix. Defaults to None.
> sparse (bool, optional): Toggle for sparese or dense. Defaults to True.
>
> Returns:
> PETSc mat: PETSc matrix
> """
> # Check if input_array is 1D and reshape if necessary
> assert len(input_array.shape) == 2, "Input array should be 2-dimensional"
> global_rows, global_cols = input_array.shape
>
> size = ((None, global_rows), (global_cols, global_cols))
>
> # Create a sparse or dense matrix based on the 'sparse' argument
> if sparse:
> matrix = PETSc.Mat().createAIJ(size=size, comm=COMM_WORLD)
> else:
> matrix = PETSc.Mat().createDense(size=size, comm=COMM_WORLD)
> matrix.setUp()
>
> local_rows_start, local_rows_end = matrix.getOwnershipRange()
>
> for counter, i in enumerate(range(local_rows_start, local_rows_end)):
> # Calculate the correct row in the array for the current process
> row_in_array = counter + local_rows_start
> matrix.setValues(
> i, range(global_cols), input_array[row_in_array, :], addv=False
> )
>
> # Assembly the matrix to compute the final structure
> matrix.assemblyBegin()
> matrix.assemblyEnd()
>
> return matrix
>
> def get_local_submatrix(A):
> """Get the local submatrix of A
>
> Args:
> A (mpi PETSc mat): partitioned PETSc matrix
>
> Returns:
> seq mat: PETSc matrix
> """
> local_rows_start, local_rows_end = A.getOwnershipRange()
> local_rows = local_rows_end - local_rows_start
> comm = A.getComm()
> rows = PETSc.IS().createStride(
> local_rows, first=local_rows_start, step=1, comm=comm
> )
> _, k = A.getSize() # Get the number of columns (k) from A's size
> cols = PETSc.IS().createStride(k, first=0, step=1, comm=comm)
>
> # Getting the local submatrix
> # TODO: To be replaced by MatMPIAIJGetLocalMat() in the future (see petsc-users mailing list). There is a missing petsc4py binding, need to add it myself (and please create a merge request)
> A_local = A.createSubMatrices(rows, cols)[0]
> return A_local
>
>
> def create_petsc_matrix_seq(input_array):
> """Building a sequential PETSc matrix from an array
>
> Args:
> input_array (np array): Input array
>
> Returns:
> seq mat: PETSc matrix
> """
> assert len(input_array.shape) == 2
>
> m, n = input_array.shape
> matrix = PETSc.Mat().createAIJ(size=(m, n), comm=COMM_SELF)
> matrix.setUp()
>
> matrix.setValues(range(m), range(n), input_array, addv=False)
>
> # Assembly the matrix to compute the final structure
> matrix.assemblyBegin()
> matrix.assemblyEnd()
>
> return matrix
>
>
> def multiply_matrices_seq(A_seq, B_seq):
> """Multiply 2 sequential matrices
>
> Args:
> A_seq (seqaij): local submatrix of A
> B_seq (seqaij): sequential matrix B
>
> Returns:
> seq mat: PETSc matrix that is the product of A_seq and B_seq
> """
> _, A_seq_cols = A_seq.getSize()
> B_seq_rows, _ = B_seq.getSize()
> assert (
> A_seq_cols == B_seq_rows
> ), f"Incompatible matrix sizes for multiplication: {A_seq_cols} != {B_seq_rows}"
> C_local = A_seq.matMult(B_seq)
> return C_local
>
>
> def concatenate_local_to_global_matrix(local_matrix, mat_type=None):
> """Create the global matrix C from the local submatrix local_matrix
>
> Args:
> local_matrix (seqaij): local submatrix of global_matrix
> partition_like (mpiaij): partitioned PETSc matrix
> mat_type (str): type of the global matrix. Defaults to None. If None, the type of local_matrix is used.
>
> Returns:
> mpi PETSc mat: partitioned PETSc matrix
> """
> local_matrix_rows, local_matrix_cols = local_matrix.getSize()
> global_rows = COMM_WORLD.allreduce(local_matrix_rows, op=MPI.SUM)
>
> # Determine the local portion of the vector
> size = ((None, global_rows), (local_matrix_cols, local_matrix_cols))
>
> if mat_type is None:
> mat_type = local_matrix.getType()
>
> if "dense" in mat_type:
> sparse = False
> else:
> sparse = True
>
> if sparse:
> global_matrix = PETSc.Mat().createAIJ(size=size, comm=COMM_WORLD)
> else:
> global_matrix = PETSc.Mat().createDense(size=size, comm=COMM_WORLD)
> global_matrix.setUp()
>
> # The exscan operation is used to get the starting global row for each process.
> # The result of the exclusive scan is the sum of the local rows from previous ranks.
> global_row_start = COMM_WORLD.exscan(local_matrix_rows, op=MPI.SUM)
> if rank == 0:
> global_row_start = 0
>
> concatenate_start = time.time()
>
> # This works but is very inefficient
> # for i in range(local_matrix_rows):
> # cols, values = local_matrix.getRow(i)
> # global_row = i + global_row_start
> # global_matrix.setValues(global_row, cols, values)
>
> all_cols = []
> all_values = []
> all_global_rows = [i + global_row_start for i in range(local_matrix_rows)]
>
> for i in range(len(all_global_rows)):
> cols, values = local_matrix.getRow(i)
> # print(f"cols: {cols}, values: {values}")
> all_cols.append(cols)
> all_values.append(values)
>
> for j in range(local_matrix_cols):
> columns = [all_cols[i][j] for i in range(len(all_cols))]
> values = [all_values[i][j] for i in range(len(all_values))]
>
> global_matrix.setValues(all_global_rows, columns, values)
>
> concatenate_end = time.time()
> Print(
> f" -Setting values: {concatenate_end - concatenate_start: 2.2f} s",
> Fore.GREEN,
> )
>
> global_matrix.assemblyBegin()
> global_matrix.assemblyEnd()
>
> return global_matrix
>
>
> # --------------------------------------------
> # EXP: Multiplication of an mpi PETSc matrix with a sequential PETSc matrix
> # C = A * B
> # [m x k] = [m x k] * [k x k]
> # --------------------------------------------
>
> m, k = 11, 7
> # Generate the random numpy matrices
> np.random.seed(0) # sets the seed to 0
> A_np = np.random.randint(low=0, high=6, size=(m, k))
> B_np = np.random.randint(low=0, high=6, size=(k, k))
>
> # Create B as a sequential matrix on each process
> B_seq = create_petsc_matrix_seq(B_np)
>
> A = create_petsc_matrix(A_np)
>
> # Getting the correct local submatrix to be multiplied by B_seq
> A_local = get_local_submatrix(A)
>
> # Multiplication of 2 sequential matrices
> C_local = multiply_matrices_seq(A_local, B_seq)
>
> # Creating the global C matrix
> C = concatenate_local_to_global_matrix(C_local) if size > 1 else C_local
>
> # --------------------------------------------
> # TEST: Multiplication of 2 numpy matrices
> # --------------------------------------------
> AB_np = np.dot(A_np, B_np)
> Print(f"MATRIX AB_np [{AB_np.shape[0]}x{AB_np.shape[1]}]")
> Print(f"{AB_np}")
>
> # Get the local values from C
> local_rows_start, local_rows_end = C.getOwnershipRange()
> C_local = C.getValues(range(local_rows_start, local_rows_end), range(k))
>
> # Assert the correctness of the multiplication for the local subset
> assert_array_almost_equal(C_local, AB_np[local_rows_start:local_rows_end, :], decimal=5)
>
>
>
> Any idea how to fix this?
>
> Thanks,
> Thanos
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/petsc-users/attachments/20231003/f3e4bd1e/attachment-0001.html>
More information about the petsc-users
mailing list