[petsc-users] Multiplication of partitioned with non-partitioned (sparse) PETSc matrices

Thanasis Boutsikakis thanasis.boutsikakis at corintis.com
Thu Aug 24 04:58:05 CDT 2023


Thanks a lot Pierre!

I managed to solve my problem for now using the (less scalable) solution that you provided. I also opened up an issue about the missing MatMPIAIJGetLocalMat() petsc4py binding here https://gitlab.com/petsc/petsc/-/issues/1443 and if no action is taken, I might take care of it myself as well, soon.

For the sake of completeness, and to help any potential PETSc user that might run into the same issue, here is my final code that works nicely in parallel.

"""Experimenting with PETSc mat-mat multiplication"""

import numpy as np
from firedrake import COMM_WORLD
from firedrake.petsc import PETSc
from numpy.testing import assert_array_almost_equal

# import pdb

nproc = COMM_WORLD.size
rank = COMM_WORLD.rank


def Print(x: str):
    """Prints the string only on the root process

    Args:
        x (str): String to be printed
    """
    PETSc.Sys.Print(x)


def print_mat_info(mat, name):
    """Prints the matrix information

    Args:
        mat (PETSc mat): PETSc matrix
        name (string): Name of the matrix
    """
    Print(f"MATRIX {name} [{mat.getSize()[0]}x{mat.getSize()[1]}]")
    # print(f"For rank {rank} local {name}: {mat.getSizes()}")
    Print(mat.getType())
    mat.view()
    Print("")
    COMM_WORLD.Barrier()
    Print("")


def create_petsc_matrix_seq(input_array):
    """Building a sequential PETSc matrix from an array

    Args:
        input_array (np array): Input array

    Returns:
        seq mat: PETSc matrix
    """
    assert len(input_array.shape) == 2

    m, n = input_array.shape
    matrix = PETSc.Mat().createAIJ(size=(m, n), comm=PETSc.COMM_SELF)
    matrix.setUp()

    matrix.setValues(range(m), range(n), input_array, addv=False)

    # Assembly the matrix to compute the final structure
    matrix.assemblyBegin()
    matrix.assemblyEnd()

    return matrix


def create_petsc_matrix(input_array, sparse=True):
    """Create a PETSc matrix from an input_array

    Args:
        input_array (np array): Input array
        partition_like (PETSc mat, optional): Petsc matrix. Defaults to None.
        sparse (bool, optional): Toggle for sparese or dense. Defaults to True.

    Returns:
        PETSc mat: PETSc matrix
    """
    # Check if input_array is 1D and reshape if necessary
    assert len(input_array.shape) == 2, "Input array should be 2-dimensional"
    global_rows, global_cols = input_array.shape

    size = ((None, global_rows), (global_cols, global_cols))

    # Create a sparse or dense matrix based on the 'sparse' argument
    if sparse:
        matrix = PETSc.Mat().createAIJ(size=size, comm=COMM_WORLD)
    else:
        matrix = PETSc.Mat().createDense(size=size, comm=COMM_WORLD)
    matrix.setUp()

    local_rows_start, local_rows_end = matrix.getOwnershipRange()

    for counter, i in enumerate(range(local_rows_start, local_rows_end)):
        # Calculate the correct row in the array for the current process
        row_in_array = counter + local_rows_start
        matrix.setValues(
            i, range(global_cols), input_array[row_in_array, :], addv=False
        )

    # Assembly the matrix to compute the final structure
    matrix.assemblyBegin()
    matrix.assemblyEnd()

    return matrix


def get_local_submatrix(A):
    """Get the local submatrix of A

    Args:
        A (mpi PETSc mat): partitioned PETSc matrix

    Returns:
        seq mat: PETSc matrix
    """
    local_rows_start, local_rows_end = A.getOwnershipRange()
    local_rows = local_rows_end - local_rows_start
    comm = A.getComm()
    rows = PETSc.IS().createStride(
        local_rows, first=local_rows_start, step=1, comm=comm
    )
    _, k = A.getSize()  # Get the number of columns (k) from A's size
    cols = PETSc.IS().createStride(k, first=0, step=1, comm=comm)

    # Getting the local submatrix
    # TODO: To be replaced by MatMPIAIJGetLocalMat() in the future (see petsc-users mailing list). There is a missing petsc4py binding, need to add it myself (and please create a merge request)
    A_local = A.createSubMatrices(rows, cols)[0]
    return A_local


def multiply_sequential_matrices(A_seq, B_seq):
    """Multiply 2 sequential matrices

    Args:
        A_seq (seqaij): local submatrix of A
        B_seq (seqaij): sequential matrix B

    Returns:
        seq mat: PETSc matrix that is the product of A_seq and B_seq
    """
    _, A_seq_cols = A_seq.getSize()
    B_seq_rows, _ = B_seq.getSize()
    assert (
        A_seq_cols == B_seq_rows
    ), f"Incompatible matrix sizes for multiplication: {A_seq_cols} != {B_seq_rows}"
    C_local = A_seq.matMult(B_seq)
    return C_local


def create_global_matrix(C_local, A):
    """Create the global matrix C from the local submatrix C_local

    Args:
        C_local (seqaij): local submatrix of C
        A (mpi PETSc mat): PETSc matrix A

    Returns:
        mpi PETSc mat: partitioned PETSc matrix C
    """
    C_local_rows, C_local_cols = C_local.getSize()
    local_rows_start, _ = A.getOwnershipRange()
    m, _ = A.getSize()
    C = PETSc.Mat().createAIJ(
        size=((None, m), (C_local_cols, C_local_cols)), comm=COMM_WORLD
    )
    C.setUp()
    for i in range(C_local_rows):
        cols, values = C_local.getRow(i)
        global_row = i + local_rows_start
        C.setValues(global_row, cols, values)
    C.assemblyBegin()
    C.assemblyEnd()
    return C


m, k = 11, 7
# Generate the random numpy matrices
np.random.seed(0)  # sets the seed to 0
A_np = np.random.randint(low=0, high=6, size=(m, k))
B_np = np.random.randint(low=0, high=6, size=(k, k))

# Create B as a sequential matrix on each process
B_seq = create_petsc_matrix_seq(B_np)
print_mat_info(B_seq, "B")

A = create_petsc_matrix(A_np)
print_mat_info(A, "A")

# Getting the correct local submatrix to be multiplied by B_seq
A_local = get_local_submatrix(A)

# Multiplication of 2 sequential matrices
C_local = multiply_sequential_matrices(A_local, B_seq)

# Creating the global C matrix
C = create_global_matrix(C_local, A)
print_mat_info(C, "C")

# --------------------------------------------
# TEST: Multiplication of 2 numpy matrices
# --------------------------------------------
AB_np = np.dot(A_np, B_np)
Print(f"MATRIX AB_np [{AB_np.shape[0]}x{AB_np.shape[1]}]")
Print(AB_np)

# Get the local values from C
local_rows_start, local_rows_end = C.getOwnershipRange()
C_local = C.getValues(range(local_rows_start, local_rows_end), range(k))

# Assert the correctness of the multiplication for the local subset
assert_array_almost_equal(C_local, AB_np[local_rows_start:local_rows_end, :], decimal=5)




> On 23 Aug 2023, at 14:40, Pierre Jolivet <pierre.jolivet at lip6.fr> wrote:
> 
> 
> 
>> On 23 Aug 2023, at 8:59 PM, Thanasis Boutsikakis <thanasis.boutsikakis at corintis.com> wrote:
>> 
>> Thanks for the clarification Mark.
>> 
>> I have tried such an implementation but I since I could not find the equivalent of https://petsc.org/main/manualpages/Mat/MatMPIAIJGetLocalMat/ for petsc4py, I used A.getLocalSubMatrix to do so, which returns a ‘localref’ object that I cannot then use to get my local 'seqaij’ matrix.
> 
> You really need MatMPIAIJGetLocalMat().
> It seems there is a missing petsc4py binding, either add it yourself (and please create a merge request), or post an issue on GitLab and hope that someone adds it.
> Another way to bypass the issue would be to call MatCreateSubMatrices() with an is_row set to the local rows, and an is_col set to all columns, but this will be less scalable than MatMPIAIJGetLocalMat().
> 
> Thanks,
> Pierre
> 
>> I also tried A.getDenseLocalMatrix() but it seems not to be suitable for my case. I couldn’t find an example in the petsc4py source code or something relevant, do you have any ideas?
>> 
>> """Experimenting with petsc mat-mat multiplication"""
>> # import pdb
>> 
>> import numpy as np
>> from firedrake import COMM_WORLD
>> from firedrake.petsc import PETSc
>> from numpy.testing import assert_array_almost_equal
>> import pdb
>> 
>> nproc = COMM_WORLD.size
>> rank = COMM_WORLD.rank
>> 
>> 
>> def Print(x: str):
>>     """Prints the string only on the root process
>> 
>>     Args:
>>         x (str): String to be printed
>>     """
>>     PETSc.Sys.Print(x)
>> 
>> 
>> def create_petsc_matrix_seq(input_array):
>>     """Building a sequential petsc matrix from an array
>> 
>>     Args:
>>         input_array (np array): Input array
>> 
>>     Returns:
>>         seq mat: PETSc matrix
>>     """
>>     assert len(input_array.shape) == 2
>> 
>>     m, n = input_array.shape
>>     matrix = PETSc.Mat().createAIJ(size=(m, n), comm=PETSc.COMM_SELF)
>>     matrix.setUp()
>> 
>>     matrix.setValues(range(m), range(n), input_array, addv=False)
>> 
>>     # Assembly the matrix to compute the final structure
>>     matrix.assemblyBegin()
>>     matrix.assemblyEnd()
>> 
>>     return matrix
>> 
>> 
>> def create_petsc_matrix(input_array, partition_like=None):
>>     """Create a PETSc matrix from an input_array
>> 
>>     Args:
>>         input_array (np array): Input array
>>         partition_like (petsc mat, optional): Petsc matrix. Defaults to None.
>>         sparse (bool, optional): Toggle for sparese or dense. Defaults to True.
>> 
>>     Returns:
>>         petsc mat: PETSc matrix
>>     """
>>     # Check if input_array is 1D and reshape if necessary
>>     assert len(input_array.shape) == 2, "Input array should be 2-dimensional"
>>     global_rows, global_cols = input_array.shape
>> 
>>     comm = COMM_WORLD
>>     if partition_like is not None:
>>         local_rows_start, local_rows_end = partition_like.getOwnershipRange()
>>         local_rows = local_rows_end - local_rows_start
>> 
>>         # No parallelization in the columns, set local_cols = None to parallelize
>>         size = ((local_rows, global_rows), (global_cols, global_cols))
>>     else:
>>         size = ((None, global_rows), (global_cols, global_cols))
>> 
>>     matrix = PETSc.Mat().createAIJ(size=size, comm=comm)
>>     matrix.setUp()
>> 
>>     local_rows_start, local_rows_end = matrix.getOwnershipRange()
>> 
>>     for counter, i in enumerate(range(local_rows_start, local_rows_end)):
>>         # Calculate the correct row in the array for the current process
>>         row_in_array = counter + local_rows_start
>>         matrix.setValues(
>>             i, range(global_cols), input_array[row_in_array, :], addv=False
>>         )
>> 
>>     # Assembly the matrix to compute the final structure
>>     matrix.assemblyBegin()
>>     matrix.assemblyEnd()
>> 
>>     return matrix
>> 
>> 
>> m, k = 10, 3
>> # Generate the random numpy matrices
>> np.random.seed(0)  # sets the seed to 0
>> A_np = np.random.randint(low=0, high=6, size=(m, k))
>> B_np = np.random.randint(low=0, high=6, size=(k, k))
>> 
>> # Create B as a sequential matrix on each process
>> B_seq = create_petsc_matrix_seq(B_np)
>> 
>> Print(B_seq.getType())
>> Print(B_seq.getSizes())
>> 
>> A = create_petsc_matrix(A_np)
>> 
>> print("A type:", A.getType())
>> print("A sizes:", A.getSizes())
>> print("A local ownership range:", A.getOwnershipRange())
>> 
>> # pdb.set_trace()
>> 
>> # Create a local sequential matrix for A using the local submatrix
>> local_rows_start, local_rows_end = A.getOwnershipRange()
>> local_rows = local_rows_end - local_rows_start
>> 
>> print("local_rows_start:", local_rows_start)
>> print("local_rows_end:", local_rows_end)
>> print("local_rows:", local_rows)
>> 
>> local_A = PETSc.Mat().createAIJ(size=(local_rows, k), comm=PETSc.COMM_SELF)
>> 
>> # pdb.set_trace()
>> 
>> comm = A.getComm()
>> rows = PETSc.IS().createStride(local_rows, first=0, step=1, comm=comm)
>> cols = PETSc.IS().createStride(k, first=0, step=1, comm=comm)
>> 
>> print("rows indices:", rows.getIndices())
>> print("cols indices:", cols.getIndices())
>> 
>> # pdb.set_trace()
>> 
>> # Create the local to global mapping for rows and columns
>> l2g_rows = PETSc.LGMap().create(rows.getIndices(), comm=comm)
>> l2g_cols = PETSc.LGMap().create(cols.getIndices(), comm=comm)
>> 
>> print("l2g_rows type:", type(l2g_rows))
>> print("l2g_rows:", l2g_rows.view())
>> print("l2g_rows type:", type(l2g_cols))
>> print("l2g_cols:", l2g_cols.view())
>> 
>> # pdb.set_trace()
>> 
>> # Set the local-to-global mapping for the matrix
>> A.setLGMap(l2g_rows, l2g_cols)
>> 
>> # pdb.set_trace()
>> 
>> # Now you can get the local submatrix
>> local_A = A.getLocalSubMatrix(rows, cols)
>> 
>> # Assembly the matrix to compute the final structure
>> local_A.assemblyBegin()
>> local_A.assemblyEnd()
>> 
>> Print(local_A.getType())
>> Print(local_A.getSizes())
>> 
>> # pdb.set_trace()
>> 
>> # Multiply the two matrices
>> local_C = local_A.matMult(B_seq)
>> 
>> 
>>> On 23 Aug 2023, at 12:56, Mark Adams <mfadams at lbl.gov> wrote:
>>> 
>>> 
>>> 
>>> On Wed, Aug 23, 2023 at 5:36 AM Thanasis Boutsikakis <thanasis.boutsikakis at corintis.com <mailto:thanasis.boutsikakis at corintis.com>> wrote:
>>>> Thanks for the suggestion Pierre.
>>>> 
>>>> Yes B is duplicated by all processes.
>>>> 
>>>> In this case, should B be created as a sequential sparse matrix using COMM_SELF?
>>> 
>>> Yes, that is what Pierre said,
>>> 
>>> Mark
>>>  
>>>> I guess if not, the multiplication of B with the output of https://petsc.org/main/manualpages/Mat/MatMPIAIJGetLocalMat/ would not go through, right? 
>>>> 
>>>> Thanks,
>>>> Thanos
>>>>  
>>>>> On 23 Aug 2023, at 10:47, Pierre Jolivet <pierre.jolivet at lip6.fr <mailto:pierre.jolivet at lip6.fr>> wrote:
>>>>> 
>>>>> 
>>>>> 
>>>>>> On 23 Aug 2023, at 5:35 PM, Thanasis Boutsikakis <thanasis.boutsikakis at corintis.com <mailto:thanasis.boutsikakis at corintis.com>> wrote:
>>>>>> 
>>>>>> Hi all,
>>>>>> 
>>>>>> I am trying to multiply two Petsc matrices as C = A * B, where A is a tall matrix and B is a relatively small matrix.
>>>>>> 
>>>>>> I have taken the decision to create A as (row-)partitioned matrix and B as a non-partitioned matrix that it is entirely shared by all procs (to avoid unnecessary communication).
>>>>>> 
>>>>>> Here is my code:
>>>>>> 
>>>>>> import numpy as np
>>>>>> from firedrake import COMM_WORLD
>>>>>> from firedrake.petsc import PETSc
>>>>>> from numpy.testing import assert_array_almost_equal
>>>>>> 
>>>>>> nproc = COMM_WORLD.size
>>>>>> rank = COMM_WORLD.rank
>>>>>> 
>>>>>> def create_petsc_matrix_non_partitioned(input_array):
>>>>>>     """Building a mpi non-partitioned petsc matrix from an array
>>>>>> 
>>>>>>     Args:
>>>>>>         input_array (np array): Input array
>>>>>>         sparse (bool, optional): Toggle for sparese or dense. Defaults to True.
>>>>>> 
>>>>>>     Returns:
>>>>>>         mpi mat: PETSc matrix
>>>>>>     """
>>>>>>     assert len(input_array.shape) == 2
>>>>>> 
>>>>>>     m, n = input_array.shape
>>>>>> 
>>>>>>     matrix = PETSc.Mat().createAIJ(size=((m, n), (m, n)), comm=COMM_WORLD)
>>>>>> 
>>>>>>     # Set the values of the matrix
>>>>>>     matrix.setValues(range(m), range(n), input_array[:, :], addv=False)
>>>>>> 
>>>>>>     # Assembly the matrix to compute the final structure
>>>>>>     matrix.assemblyBegin()
>>>>>>     matrix.assemblyEnd()
>>>>>> 
>>>>>>     return matrix
>>>>>> 
>>>>>> 
>>>>>> def create_petsc_matrix(input_array, partition_like=None):
>>>>>>     """Create a PETSc matrix from an input_array
>>>>>> 
>>>>>>     Args:
>>>>>>         input_array (np array): Input array
>>>>>>         partition_like (petsc mat, optional): Petsc matrix. Defaults to None.
>>>>>>         sparse (bool, optional): Toggle for sparese or dense. Defaults to True.
>>>>>> 
>>>>>>     Returns:
>>>>>>         petsc mat: PETSc matrix
>>>>>>     """
>>>>>>     # Check if input_array is 1D and reshape if necessary
>>>>>>     assert len(input_array.shape) == 2, "Input array should be 2-dimensional"
>>>>>>     global_rows, global_cols = input_array.shape
>>>>>> 
>>>>>>     comm = COMM_WORLD
>>>>>>     if partition_like is not None:
>>>>>>         local_rows_start, local_rows_end = partition_like.getOwnershipRange()
>>>>>>         local_rows = local_rows_end - local_rows_start
>>>>>> 
>>>>>>         # No parallelization in the columns, set local_cols = None to parallelize
>>>>>>         size = ((local_rows, global_rows), (global_cols, global_cols))
>>>>>>     else:
>>>>>>         size = ((None, global_rows), (global_cols, global_cols))
>>>>>> 
>>>>>>     matrix = PETSc.Mat().createAIJ(size=size, comm=comm)
>>>>>>     matrix.setUp()
>>>>>> 
>>>>>>     local_rows_start, local_rows_end = matrix.getOwnershipRange()
>>>>>> 
>>>>>>     for counter, i in enumerate(range(local_rows_start, local_rows_end)):
>>>>>>         # Calculate the correct row in the array for the current process
>>>>>>         row_in_array = counter + local_rows_start
>>>>>>         matrix.setValues(
>>>>>>             i, range(global_cols), input_array[row_in_array, :], addv=False
>>>>>>         )
>>>>>> 
>>>>>>     # Assembly the matrix to compute the final structure
>>>>>>     matrix.assemblyBegin()
>>>>>>     matrix.assemblyEnd()
>>>>>> 
>>>>>>     return matrix
>>>>>> 
>>>>>> 
>>>>>> m, k = 10, 3
>>>>>> # Generate the random numpy matrices
>>>>>> np.random.seed(0)  # sets the seed to 0
>>>>>> A_np = np.random.randint(low=0, high=6, size=(m, k))
>>>>>> B_np = np.random.randint(low=0, high=6, size=(k, k))
>>>>>> 
>>>>>> 
>>>>>> A = create_petsc_matrix(A_np)
>>>>>> 
>>>>>> B = create_petsc_matrix_non_partitioned(B_np)
>>>>>> 
>>>>>> # Now perform the multiplication
>>>>>> C = A * B
>>>>>> 
>>>>>> The problem with this is that there is a mismatch between the local rows of A (depend on the partitioning) and the global rows of B (3 for all procs), so that the multiplication cannot happen in parallel. Here is the error:
>>>>>> 
>>>>>> [0]PETSC ERROR: ------------------------------------------------------------------------
>>>>>> [0]PETSC ERROR: Caught signal number 11 SEGV: Segmentation Violation, probably memory access out of range
>>>>>> [0]PETSC ERROR: Try option -start_in_debugger or -on_error_attach_debugger
>>>>>> [0]PETSC ERROR: or see https://petsc.org/release/faq/#valgrind and https://petsc.org/release/faq/
>>>>>> [0]PETSC ERROR: configure using --with-debugging=yes, recompile, link, and run
>>>>>> [0]PETSC ERROR: to get more information on the crash.
>>>>>> [0]PETSC ERROR: Run with -malloc_debug to check if memory corruption is causing the crash.
>>>>>> application called MPI_Abort(MPI_COMM_WORLD, 59) - process 0
>>>>>> [unset]: write_line error; fd=-1 buf=:cmd=abort exitcode=59
>>>>>> :
>>>>>> system msg for write_line failure : Bad file descriptor
>>>>>> 
>>>>>> 
>>>>>> Is there a standard way to achieve this?
>>>>> 
>>>>> Your B is duplicated by all processes?
>>>>> If so, then, call https://petsc.org/main/manualpages/Mat/MatMPIAIJGetLocalMat/, do a sequential product with B on COMM_SELF, not COMM_WORLD, and use https://petsc.org/main/manualpages/Mat/MatCreateMPIMatConcatenateSeqMat/ with the output.
>>>>> 
>>>>> Thanks,
>>>>> Pierre
>>>>> 
>>>>>> Thanks,
>>>>>> Thanos
>>>>>> 
>>>>>>   
>>>> 
>> 
> 

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/petsc-users/attachments/20230824/f9d9556f/attachment-0001.html>


More information about the petsc-users mailing list