<!-- BaNnErBlUrFlE-BoDy-start -->
<!-- Preheader Text : BEGIN -->
<div style="display:none !important;display:none;visibility:hidden;mso-hide:all;font-size:1px;color:#ffffff;line-height:1px;height:0px;max-height:0px;opacity:0;overflow:hidden;">
 Hi Barry, Thanks, I am interested in this PCMPI solution provided by PETSc! I tried the src/ksp/ksp/tutorials/ex1. c which is configured in CMakelists as follows: ./configure PETSC_ARCH=config-debug --with-scalar-type=complex --with-fortran-kernels=1
</div>
<!-- Preheader Text : END -->

<!-- Email Banner : BEGIN -->
<div style="display:none !important;display:none;visibility:hidden;mso-hide:all;font-size:1px;color:#ffffff;line-height:1px;height:0px;max-height:0px;opacity:0;overflow:hidden;">ZjQcmQRYFpfptBannerStart</div>

<!--[if ((ie)|(mso))]>
  <table border="0" cellspacing="0" cellpadding="0" width="100%" style="padding: 16px 0px 16px 0px; direction: ltr" ><tr><td>
    <table border="0" cellspacing="0" cellpadding="0" style="padding: 0px 10px 5px 6px; width: 100%; border-radius:4px; border-top:4px solid #90a4ae;background-color:#D0D8DC;"><tr><td valign="top">
      <table align="left" border="0" cellspacing="0" cellpadding="0" style="padding: 4px 8px 4px 8px">
        <tr><td style="color:#000000; font-family: 'Arial', sans-serif; font-weight:bold; font-size:14px; direction: ltr">
          This Message Is From an External Sender
        </td></tr>
        <tr><td style="color:#000000; font-weight:normal; font-family: 'Arial', sans-serif; font-size:12px; direction: ltr">
          This message came from outside your organization.
        </td></tr>

      </table>

    </td></tr></table>
  </td></tr></table>
<![endif]-->

<![if !((ie)|(mso))]>
  <div dir="ltr"  id="pfptBannerwfqzir0" style="all: revert !important; display:block !important; text-align: left !important; margin:16px 0px 16px 0px !important; padding:8px 16px 8px 16px !important; border-radius: 4px !important; min-width: 200px !important; background-color: #D0D8DC !important; background-color: #D0D8DC; border-top: 4px solid #90a4ae !important; border-top: 4px solid #90a4ae;">
    <div id="pfptBannerwfqzir0" style="all: unset !important; float:left !important; display:block !important; margin: 0px 0px 1px 0px !important; max-width: 600px !important;">
      <div id="pfptBannerwfqzir0" style="all: unset !important; display:block !important; visibility: visible !important; background-color: #D0D8DC !important; color:#000000 !important; color:#000000; font-family: 'Arial', sans-serif !important; font-family: 'Arial', sans-serif; font-weight:bold !important; font-weight:bold; font-size:14px !important; line-height:18px !important; line-height:18px">
        This Message Is From an External Sender
      </div>
      <div id="pfptBannerwfqzir0" style="all: unset !important; display:block !important; visibility: visible !important; background-color: #D0D8DC !important; color:#000000 !important; color:#000000; font-weight:normal; font-family: 'Arial', sans-serif !important; font-family: 'Arial', sans-serif; font-size:12px !important; line-height:18px !important; line-height:18px; margin-top:2px !important;">
This message came from outside your organization.
      </div>

    </div>

    <div style="clear: both !important; display: block !important; visibility: hidden !important; line-height: 0 !important; font-size: 0.01px !important; height: 0px"> </div>
  </div>
<![endif]>

<div style="display:none !important;display:none;visibility:hidden;mso-hide:all;font-size:1px;color:#ffffff;line-height:1px;height:0px;max-height:0px;opacity:0;overflow:hidden;">ZjQcmQRYFpfptBannerEnd</div>
<!-- Email Banner : END -->

<!-- BaNnErBlUrFlE-BoDy-end -->
<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head><!-- BaNnErBlUrFlE-HeAdEr-start -->
<style>
  #pfptBannerwfqzir0 { all: revert !important; display: block !important; 
    visibility: visible !important; opacity: 1 !important; 
    background-color: #D0D8DC !important; 
    max-width: none !important; max-height: none !important }
  .pfptPrimaryButtonwfqzir0:hover, .pfptPrimaryButtonwfqzir0:focus {
    background-color: #b4c1c7 !important; }
  .pfptPrimaryButtonwfqzir0:active {
    background-color: #90a4ae !important; }
</style>

<!-- BaNnErBlUrFlE-HeAdEr-end -->

<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<meta name="Generator" content="Microsoft Word 15 (filtered medium)">
<style><!--
/* Font Definitions */
@font-face
        {font-family:"Cambria Math";
        panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
        {font-family:DengXian;
        panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
        {font-family:Aptos;
        panose-1:2 11 0 4 2 2 2 2 2 4;}
@font-face
        {font-family:"Segoe UI";
        panose-1:2 11 5 2 4 2 4 2 2 3;}
@font-face
        {font-family:"\@DengXian";
        panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
        {font-family:"PingFang SC";
        panose-1:2 11 4 0 0 0 0 0 0 0;}
@font-face
        {font-family:"\@PingFang SC";}
@font-face
        {font-family:"Helvetica Neue";
        panose-1:2 0 5 3 0 0 0 2 0 4;}
@font-face
        {font-family:Menlo;
        panose-1:2 11 6 9 3 8 4 2 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0cm;
        font-size:10.0pt;
        font-family:"Aptos",sans-serif;}
a:link, span.MsoHyperlink
        {mso-style-priority:99;
        color:blue;
        text-decoration:underline;}
p.p1, li.p1, div.p1
        {mso-style-name:p1;
        margin:0cm;
        font-size:10.0pt;
        font-family:"Helvetica Neue";}
span.apple-converted-space
        {mso-style-name:apple-converted-space;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-size:10.0pt;
        mso-ligatures:none;}
@page WordSection1
        {size:612.0pt 792.0pt;
        margin:72.0pt 72.0pt 72.0pt 72.0pt;}
div.WordSection1
        {page:WordSection1;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
</head>
<body lang="en-CN" link="blue" vlink="purple" style="word-wrap:break-word;line-break:after-white-space">
<div class="WordSection1">
<p class="MsoNormal"><span style="font-size:11.0pt">Hi Barry,<br>
<br>
Thanks, I am interested in this PCMPI solution provided by PETSc!<br>
<br>
I tried the src/ksp/ksp/tutorials/ex1.c which is </span><b><span style="font-family:"Helvetica Neue"">configured in CMakelists as follows:</span></b></p>
<p class="p1"><span style="mso-fareast-language:ZH-TW"><br>
</span>./configure PETSC_ARCH=config-debug --with-scalar-type=complex --with-fortran-kernels=1 --with-debugging=0 --with-logging=0 --with-cxx=g++ --download-mpich --download-superlu --download-opencascade --with-openblas-include=${OPENBLAS_INC} --with-openblas-lib=${OPENBLAS_LIB}</p>
<p class="MsoNormal"><span style="font-family:"Helvetica Neue";mso-fareast-language:ZH-TW"><br>
</span><span style="font-size:11.0pt;mso-fareast-language:ZH-TW">In the linux terminal, my bash script is as follows,<br>
<br>
mpiexec -n 4 ./ex1 -mpi_linear_solver_server -mpi_linear_solver_server_view<o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:11.0pt;mso-fareast-language:ZH-TW"><o:p> </o:p></span></p>
<p class="MsoNormal"><span style="font-size:11.0pt">However, I found the ouput a bit strange<br>
<br>
<i>Norm of error 1.23629e-15, Iterations 1<o:p></o:p></i></span></p>
<p class="MsoNormal"><i><span style="font-size:11.0pt">MPI linear solver server statistics:<o:p></o:p></span></i></p>
<p class="MsoNormal"><i><span style="font-size:11.0pt">    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its<o:p></o:p></span></i></p>
<p class="MsoNormal"><i><span style="font-size:11.0pt">  Sequential         3                         2            10           1<o:p></o:p></span></i></p>
<p class="MsoNormal"><i><span style="font-size:11.0pt">Norm of error 1.23629e-15, Iterations 1<o:p></o:p></span></i></p>
<p class="MsoNormal"><i><span style="font-size:11.0pt">MPI linear solver server statistics:<o:p></o:p></span></i></p>
<p class="MsoNormal"><i><span style="font-size:11.0pt">    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its<o:p></o:p></span></i></p>
<p class="MsoNormal"><i><span style="font-size:11.0pt">  Sequential         3                         2            10           1<o:p></o:p></span></i></p>
<p class="MsoNormal"><i><span style="font-size:11.0pt">Norm of error 1.23629e-15, Iterations 1<o:p></o:p></span></i></p>
<p class="MsoNormal"><i><span style="font-size:11.0pt">MPI linear solver server statistics:<o:p></o:p></span></i></p>
<p class="MsoNormal"><i><span style="font-size:11.0pt">    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its<o:p></o:p></span></i></p>
<p class="MsoNormal"><i><span style="font-size:11.0pt">  Sequential         3                         2            10           1<o:p></o:p></span></i></p>
<p class="MsoNormal"><i><span style="font-size:11.0pt">Norm of error 1.23629e-15, Iterations 1<o:p></o:p></span></i></p>
<p class="MsoNormal"><i><span style="font-size:11.0pt">MPI linear solver server statistics:<o:p></o:p></span></i></p>
<p class="MsoNormal"><i><span style="font-size:11.0pt">    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its<o:p></o:p></span></i></p>
<p class="MsoNormal"><i><span style="font-size:11.0pt">  Sequential         3                         2            10           1</span></i><span style="font-size:11.0pt"><br>
<br>
It seems that mpi started four processes, but they all did the same things, and I am confused why the ranks showed sequential. Are these supposed to be the desired output when the
</span><span style="font-size:11.0pt;mso-fareast-language:ZH-TW">mpi_linear_solver_server is turned on?<br>
<br>
And if I run ex1 without any hypen options, I got <br>
<br>
<i>Norm of error 2.47258e-15, Iterations 5</i><br>
<br>
It looks like the KSPSolver use 5 iterations to reach convergence, but why when mpi_linear_solver_server is enabled, it uses 1?<br>
<br>
I hope to get some help on these issues, thank you!<br>
<br>
Sincerely,<o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:11.0pt;mso-fareast-language:ZH-TW">Yongzhong<br>
<br>
</span><span style="font-size:11.0pt"><br>
<br>
<o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:11.0pt"><o:p> </o:p></span></p>
<div id="mail-editor-reference-message-container">
<div>
<div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0cm 0cm 0cm">
<p class="MsoNormal" style="margin-bottom:12.0pt"><b><span style="font-size:12.0pt;color:black">From:
</span></b><span style="font-size:12.0pt;color:black">Barry Smith <bsmith@petsc.dev><br>
<b>Date: </b>Tuesday, April 23, 2024 at 5:15</span><span style="font-size:12.0pt;font-family:"Arial",sans-serif;color:black"> </span><span style="font-size:12.0pt;color:black">PM<br>
<b>To: </b>Yongzhong Li <yongzhong.li@mail.utoronto.ca><br>
<b>Cc: </b>petsc-users@mcs.anl.gov <petsc-users@mcs.anl.gov>, petsc-maint@mcs.anl.gov <petsc-maint@mcs.anl.gov>, Piero Triverio <piero.triverio@utoronto.ca><br>
<b>Subject: </b>Re: [petsc-maint] Inquiry about Multithreading Capabilities in PETSc's KSPSolver<o:p></o:p></span></p>
</div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
</div>
<p class="MsoNormal"><span style="font-size:12.0pt">  Yes, only the routines that can explicitly use BLAS have multi-threading.<o:p></o:p></span></p>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
</div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt">   PETSc does support using nay MPI linear solvers from a sequential (or OpenMP) main program using the </span><a href="https://urldefense.us/v3/__https://petsc.org/release/manualpages/PC/PCMPI/*pcmpi__;Iw!!G_uCfscf7eWS!coHE0Q8qDqtiQ2cOB3iEz4D8dXOcJd56gBkPuaz5Y7pMygwOLBhFHyXhSi3WWUkAulg3kXxAYomf9fk-dT-fzj0mGFIgnkEZ54I$"><span style="font-size:12.0pt">https://petsc.org/release/manualpages/PC/PCMPI/#pcmpi</span></a><span style="font-size:12.0pt"> construct.
  I am finishing up better support in the branch </span><span style="font-size:12.0pt;font-family:Menlo">barry/2023-09-15/fix-log-pcmpi</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
</div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
</div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt;font-family:Menlo">  Barry</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
</div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
</div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
</div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
</div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"><br>
<br>
<o:p></o:p></span></p>
<blockquote style="margin-top:5.0pt;margin-bottom:5.0pt">
<div>
<p class="MsoNormal"><span style="font-size:12.0pt">On Apr 23, 2024, at 3:59</span><span style="font-size:12.0pt;font-family:"Arial",sans-serif"> </span><span style="font-size:12.0pt">PM, Yongzhong Li <yongzhong.li@mail.utoronto.ca> wrote:<o:p></o:p></span></p>
</div>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<div>
<div>
<div>
<p class="MsoNormal"><span lang="EN-US" style="font-size:11.0pt">Thanks Barry! Does this mean that the sparse matrix-vector products, which actually constitute the majority of the computations in my GMRES routine in PETSc, don’t utilize multithreading? Only
 basic vector operations such as VecAXPY and VecDot or dense matrix operations in PETSc will benefit from multithreading, is it correct?<br>
<br>
Best,</span></p>
</div>
<div>
<p class="MsoNormal"><span lang="EN-US" style="font-size:11.0pt">Yongzhong</span></p>
</div>
<div>
<p class="MsoNormal"><span style="font-size:11.0pt"> </span></p>
</div>
<div id="mail-editor-reference-message-container">
<div>
<div style="border:none;border-top:solid windowtext 1.0pt;padding:3.0pt 0cm 0cm 0cm;border-color:currentcolor currentcolor">
<p class="MsoNormal" style="margin-bottom:12.0pt"><b><span style="font-size:12.0pt">From:<span class="apple-converted-space"> </span></span></b><span style="font-size:12.0pt">Barry Smith <</span><a href="mailto:bsmith@petsc.dev"><span style="font-size:12.0pt">bsmith@petsc.dev</span></a><span style="font-size:12.0pt">><br>
<b>Date:<span class="apple-converted-space"> </span></b>Tuesday, April 23, 2024 at 3:35</span><span style="font-size:12.0pt;font-family:"Arial",sans-serif"> </span><span style="font-size:12.0pt">PM<br>
<b>To:<span class="apple-converted-space"> </span></b>Yongzhong Li <</span><a href="mailto:yongzhong.li@mail.utoronto.ca"><span style="font-size:12.0pt">yongzhong.li@mail.utoronto.ca</span></a><span style="font-size:12.0pt">><br>
<b>Cc:<span class="apple-converted-space"> </span></b></span><a href="mailto:petsc-users@mcs.anl.gov"><span style="font-size:12.0pt">petsc-users@mcs.anl.gov</span></a><span class="apple-converted-space"><span style="font-size:12.0pt"> </span></span><span style="font-size:12.0pt"><</span><a href="mailto:petsc-users@mcs.anl.gov"><span style="font-size:12.0pt">petsc-users@mcs.anl.gov</span></a><span style="font-size:12.0pt">>,<span class="apple-converted-space"> </span></span><a href="mailto:petsc-maint@mcs.anl.gov"><span style="font-size:12.0pt">petsc-maint@mcs.anl.gov</span></a><span class="apple-converted-space"><span style="font-size:12.0pt"> </span></span><span style="font-size:12.0pt"><</span><a href="mailto:petsc-maint@mcs.anl.gov"><span style="font-size:12.0pt">petsc-maint@mcs.anl.gov</span></a><span style="font-size:12.0pt">>,
 Piero Triverio <</span><a href="mailto:piero.triverio@utoronto.ca"><span style="font-size:12.0pt">piero.triverio@utoronto.ca</span></a><span style="font-size:12.0pt">><br>
<b>Subject:<span class="apple-converted-space"> </span></b>Re: [petsc-maint] Inquiry about Multithreading Capabilities in PETSc's KSPSolver</span><o:p></o:p></p>
</div>
<table class="MsoNormalTable" border="0" cellspacing="0" cellpadding="0" align="left" width="100%" style="width:100.0%;display:table;float:none">
<tbody>
<tr>
<td style="background:#A6A6A6;padding:5.25pt 1.5pt 5.25pt 1.5pt"></td>
<td width="100%" style="width:100.0%;background:#EAEAEA;padding:5.25pt 3.75pt 5.25pt 11.25pt">
<div>
<div>
<p class="MsoNormal" style="mso-element:frame;mso-element-frame-hspace:2.25pt;mso-element-wrap:around;mso-element-anchor-vertical:paragraph;mso-element-anchor-horizontal:column;mso-height-rule:exactly">
<span lang="ZH-CN" style="font-size:9.0pt;font-family:"PingFang SC",sans-serif;color:#212121">你通常不会收到来自</span><span class="apple-converted-space"><span style="font-size:9.0pt;font-family:"Segoe UI",sans-serif;color:#212121"> </span></span><span style="color:black"><a href="mailto:bsmith@petsc.dev"><span style="font-size:9.0pt;font-family:"Segoe UI",sans-serif">bsmith@petsc.dev</span></a></span><span class="apple-converted-space"><span style="font-size:9.0pt;font-family:"Segoe UI",sans-serif;color:#212121"> </span></span><span lang="ZH-CN" style="font-size:9.0pt;font-family:"PingFang SC",sans-serif;color:#212121">的电子邮件。</span><span style="color:black"><a href="https://urldefense.us/v3/__https://aka.ms/LearnAboutSenderIdentification__;!!G_uCfscf7eWS!coHE0Q8qDqtiQ2cOB3iEz4D8dXOcJd56gBkPuaz5Y7pMygwOLBhFHyXhSi3WWUkAulg3kXxAYomf9fk-dT-fzj0mGFIg97PG9-E$"><span lang="ZH-CN" style="font-size:9.0pt;font-family:"PingFang SC",sans-serif">了解这一点为什么很重要</span></a></span></p>
</div>
</div>
</td>
<td width="75" style="width:56.25pt;background:#EAEAEA;padding:5.25pt 3.75pt 5.25pt 3.75pt">
</td>
</tr>
</tbody>
</table>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"> </span></p>
</div>
</div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt">   Intel MKL or OpenBLAS are the best bet, but for vector operations they will not be significant since the vector operations do not dominate the computations.</span></p>
</div>
<div>
<div>
<p class="MsoNormal" style="margin-bottom:12.0pt"><o:p> </o:p></p>
</div>
<blockquote style="margin-top:5.0pt;margin-bottom:5.0pt">
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt">On Apr 23, 2024, at 3:23</span><span style="font-size:12.0pt;font-family:"Arial",sans-serif"> </span><span style="font-size:12.0pt">PM, Yongzhong Li <</span><a href="mailto:yongzhong.li@mail.utoronto.ca"><span style="font-size:12.0pt">yongzhong.li@mail.utoronto.ca</span></a><span style="font-size:12.0pt">>
 wrote:</span></p>
</div>
</div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"> </span></p>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:11.0pt">Hi Barry,<br>
<br>
Thank you for the information provided!<br>
<br>
Do you think different BLAS implementation will affect the multithreading performance of some vectors operations in GMERS in PETSc?</span></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:11.0pt"> </span></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:11.0pt">I am now using OpenBLAS but didn’t see much improvement when theb multithreading are enabled, do you think other implementation<span class="apple-converted-space"> </span></span><span lang="EN-US" style="font-size:11.0pt">such
 as netlib and intel-mkl will help?<br>
<br>
Best,</span></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal"><span lang="EN-US" style="font-size:11.0pt">Yongzhong</span></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:11.0pt"> </span></p>
</div>
</div>
<div id="mail-editor-reference-message-container">
<div>
<div style="border:none;border-top:solid windowtext 1.0pt;padding:3.0pt 0cm 0cm 0cm;border-color:currentcolor">
<p class="MsoNormal" style="margin-bottom:12.0pt"><b><span style="font-size:12.0pt">From:<span class="apple-converted-space"> </span></span></b><span style="font-size:12.0pt">Barry Smith <</span><a href="mailto:bsmith@petsc.dev"><span style="font-size:12.0pt">bsmith@petsc.dev</span></a><span style="font-size:12.0pt">><br>
<b>Date:<span class="apple-converted-space"> </span></b>Monday, April 22, 2024 at 4:20</span><span style="font-size:12.0pt;font-family:"Arial",sans-serif"> </span><span style="font-size:12.0pt">PM<br>
<b>To:<span class="apple-converted-space"> </span></b>Yongzhong Li <</span><a href="mailto:yongzhong.li@mail.utoronto.ca"><span style="font-size:12.0pt">yongzhong.li@mail.utoronto.ca</span></a><span style="font-size:12.0pt">><br>
<b>Cc:<span class="apple-converted-space"> </span></b></span><a href="mailto:petsc-users@mcs.anl.gov"><span style="font-size:12.0pt">petsc-users@mcs.anl.gov</span></a><span class="apple-converted-space"><span style="font-size:12.0pt"> </span></span><span style="font-size:12.0pt"><</span><a href="mailto:petsc-users@mcs.anl.gov"><span style="font-size:12.0pt">petsc-users@mcs.anl.gov</span></a><span style="font-size:12.0pt">>,<span class="apple-converted-space"> </span></span><a href="mailto:petsc-maint@mcs.anl.gov"><span style="font-size:12.0pt">petsc-maint@mcs.anl.gov</span></a><span class="apple-converted-space"><span style="font-size:12.0pt"> </span></span><span style="font-size:12.0pt"><</span><a href="mailto:petsc-maint@mcs.anl.gov"><span style="font-size:12.0pt">petsc-maint@mcs.anl.gov</span></a><span style="font-size:12.0pt">>,
 Piero Triverio <</span><a href="mailto:piero.triverio@utoronto.ca"><span style="font-size:12.0pt">piero.triverio@utoronto.ca</span></a><span style="font-size:12.0pt">><br>
<b>Subject:<span class="apple-converted-space"> </span></b>Re: [petsc-maint] Inquiry about Multithreading Capabilities in PETSc's KSPSolver</span><o:p></o:p></p>
</div>
<table class="MsoNormalTable" border="0" cellspacing="0" cellpadding="0" align="left" width="100%" style="width:100.0%;display:table;float:none">
<tbody>
<tr>
<td style="background:#A6A6A6;padding:5.25pt 1.5pt 5.25pt 1.5pt"></td>
<td width="100%" style="width:100.0%;background:#EAEAEA;padding:5.25pt 3.75pt 5.25pt 11.25pt">
<div>
<div>
<div>
<p class="MsoNormal" style="mso-element:frame;mso-element-frame-hspace:2.25pt;mso-element-wrap:around;mso-element-anchor-vertical:paragraph;mso-element-anchor-horizontal:column;mso-height-rule:exactly">
<span lang="ZH-CN" style="font-size:9.0pt;font-family:"PingFang SC",sans-serif;color:#212121">你通常不会收到来自</span><span class="apple-converted-space"><span style="font-size:9.0pt;font-family:"Segoe UI",sans-serif;color:#212121"> </span></span><span style="color:black"><a href="mailto:bsmith@petsc.dev"><span style="font-size:9.0pt;font-family:"Segoe UI",sans-serif">bsmith@petsc.dev</span></a></span><span class="apple-converted-space"><span style="font-size:9.0pt;font-family:"Segoe UI",sans-serif;color:#212121"> </span></span><span lang="ZH-CN" style="font-size:9.0pt;font-family:"PingFang SC",sans-serif;color:#212121">的电子邮件。</span><span style="color:black"><a href="https://urldefense.us/v3/__https://aka.ms/LearnAboutSenderIdentification__;!!G_uCfscf7eWS!coHE0Q8qDqtiQ2cOB3iEz4D8dXOcJd56gBkPuaz5Y7pMygwOLBhFHyXhSi3WWUkAulg3kXxAYomf9fk-dT-fzj0mGFIg97PG9-E$"><span lang="ZH-CN" style="font-size:9.0pt;font-family:"PingFang SC",sans-serif">了解这一点为什么很重要</span></a></span></p>
</div>
</div>
</div>
</td>
<td width="75" style="width:56.25pt;background:#EAEAEA;padding:5.25pt 3.75pt 5.25pt 3.75pt">
</td>
</tr>
</tbody>
</table>
<div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"> </span></p>
</div>
</div>
</div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt">   PETSc provided solvers do not directly use threads. </span></p>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"> </span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt">   The BLAS used by LAPACK and PETSc may use threads depending on what BLAS is being used and how it was configured. </span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"> </span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt">   Some of the vector operations in GMRES in PETSc use BLAS that can use threads, including axpy, dot, etc. For sufficiently large problems, the use of threaded BLAS can help with these routines, but not significantly
 for the solver. </span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"> </span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt">   Dense matrix-vector products MatMult() and dense matrix direct solvers PCLU use BLAS and thus can benefit from threading. The benefit can be significant for large enough problems with good hardware, especially
 with PCLU. </span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"> </span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt">   If you run with -blas_view  PETSc tries to indicate information about the threading of BLAS. You can also use -blas_num_threads <n> to set the number of threads, equivalent to setting the environmental
 variable.  For dense solvers you can vary the number of threads and run with -log_view to see what it helps to improve and what it does not effect.</span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"> </span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"> </span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="margin-bottom:12.0pt"> <o:p></o:p></p>
</div>
<blockquote style="margin-top:5.0pt;margin-bottom:5.0pt">
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt">On Apr 22, 2024, at 4:06</span><span style="font-size:12.0pt;font-family:"Arial",sans-serif"> </span><span style="font-size:12.0pt">PM, Yongzhong Li <</span><a href="mailto:yongzhong.li@mail.utoronto.ca"><span style="font-size:12.0pt">yongzhong.li@mail.utoronto.ca</span></a><span style="font-size:12.0pt">>
 wrote:</span></p>
</div>
</div>
</div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"> </span></p>
</div>
</div>
<div>
<div id="pfptBannerbeaibqa">
<div id="pfptBannerbeaibqa">
<div id="pfptBannerbeaibqa">
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt;font-family:"Arial",sans-serif">This Message Is From an External Sender</span></p>
</div>
</div>
</div>
<div id="pfptBannerbeaibqa">
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt;font-family:"Arial",sans-serif">This message came from outside your organization.</span></p>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-family:"Helvetica Neue"">Hello all,</span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-family:"Helvetica Neue""> </span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-family:"Helvetica Neue"">I am writing to ask if PETSc’s KSPSolver makes use of OpenMP/multithreading, specifically when performing iterative solutions with the GMRES algorithm.</span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-family:"Helvetica Neue""> </span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-family:"Helvetica Neue"">The questions appeared when I was running a large numerical program based on boundary element method. I used the PETSc's GMRES algorithm in KSPSolve to solve a shell matrix system iteratively.
 I observed that threads were being utilized, controlled by the<span class="apple-converted-space"> </span><b>OPENBLAS_NUM_THREADS</b><span class="apple-converted-space"> </span>environment variable. However, I noticed no significant performance difference
 between running the solver with multiple threads versus a single thread.<br>
<br>
Could you please<span class="apple-converted-space"> </span><b>confirm if GMRES in KSPSolve leverages multithreading, and also whether it is influenced by the multithreadings of the low-level math libraries such as BLAS and LAPACK?</b><span class="apple-converted-space"> </span><b>If
 so</b>, how can I enable multithreading effectively to see noticeable improvements in solution times when using GMRES?<span class="apple-converted-space"> </span><b>If not</b>, why do I observe that threads are being used during the GMERS solutions?</span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-family:"Helvetica Neue""> </span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><b><span style="font-family:"Helvetica Neue"">For reference, I am using PETSc version 3.16.0, configured in CMakelists as follows:</span></b></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-family:"Helvetica Neue""><br>
./configure PETSC_ARCH=config-release --with-scalar-type=complex --with-fortran-kernels=1 --with-debugging=0 COPTFLAGS=-O3 -march=native CXXOPTFLAGS=-O3 -march=native FOPTFLAGS=-O3 -march=native --with-cxx=g++ --download-openmpi --download-superlu --download-opencascade
 --with-openblas-include=${OPENBLAS_INC} --with-openblas-lib=${OPENBLAS_LIB} --with-threadsafety --with-log=0 --with-openmp<br>
<br>
To simplify the diagnosis of potential issues, I have also written a small example program using GMRES to solve a sparse matrix system derived from a 2D Poisson problem using the finite difference method. I found similar issues on this piece of codes. The code
 is as follows:<br>
<br>
#include <petscksp.h><br>
<br>
/* Monitor function to print iteration number and residual norm */<br>
PetscErrorCode MyKSPMonitor(KSP ksp, PetscInt n, PetscReal rnorm, void *ctx) {<br>
<span class="apple-converted-space">    </span>PetscErrorCode ierr;<br>
<span class="apple-converted-space">    </span>ierr = PetscPrintf(PETSC_COMM_WORLD, "Iteration %D, Residual norm %g\n", n, (double)rnorm);<br>
<span class="apple-converted-space">    </span>CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>return 0;<br>
}<br>
<br>
int main(int argc, char **args) {<br>
<span class="apple-converted-space">    </span>Vec x, b, x_true, e;<br>
<span class="apple-converted-space">    </span>Mat A;<br>
<span class="apple-converted-space">    </span>KSP ksp;<br>
<span class="apple-converted-space">    </span>PetscErrorCode ierr;<br>
<span class="apple-converted-space">    </span>PetscInt i, j, Ii, J, n = 500; // Size of the grid n x n<br>
<span class="apple-converted-space">    </span>PetscInt Istart, Iend, ncols;<br>
<span class="apple-converted-space">    </span>PetscScalar v;<br>
<span class="apple-converted-space">    </span>PetscMPIInt rank;<br>
<span class="apple-converted-space">    </span>PetscInitialize(&argc, &args, NULL, NULL);<br>
<span class="apple-converted-space">    </span>PetscLogDouble t1, t2;<span class="apple-converted-space">     </span>// Variables for timing<br>
<span class="apple-converted-space">    </span>MPI_Comm_rank(PETSC_COMM_WORLD, &rank);<br>
<br>
<span class="apple-converted-space">    </span>// Create vectors and matrix<br>
<span class="apple-converted-space">    </span>ierr = VecCreateMPI(PETSC_COMM_WORLD, PETSC_DECIDE, n*n, &x); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>ierr = VecDuplicate(x, &b); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>ierr = VecDuplicate(x, &x_true); CHKERRQ(ierr);<br>
<br>
<span class="apple-converted-space">    </span>// Set true solution as all ones<br>
<span class="apple-converted-space">    </span>ierr = VecSet(x_true, 1.0); CHKERRQ(ierr);<br>
<br>
<span class="apple-converted-space">    </span>// Create and assemble matrix A for the 2D Laplacian using 5-point stencil<br>
<span class="apple-converted-space">    </span>ierr = MatCreate(PETSC_COMM_WORLD, &A); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>ierr = MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, n*n, n*n); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>ierr = MatSetFromOptions(A); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>ierr = MatSetUp(A); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>ierr = MatGetOwnershipRange(A, &Istart, &Iend); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>for (Ii = Istart; Ii < Iend; Ii++) {<br>
<span class="apple-converted-space">        </span>i = Ii / n; // Row index<br>
<span class="apple-converted-space">        </span>j = Ii % n; // Column index<br>
<span class="apple-converted-space">        </span>v = -4.0;<br>
<span class="apple-converted-space">        </span>ierr = MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES); CHKERRQ(ierr);<br>
<span class="apple-converted-space">        </span>if (i > 0) { // South<br>
<span class="apple-converted-space">            </span>J = Ii - n;<br>
<span class="apple-converted-space">            </span>v = 1.0;<br>
<span class="apple-converted-space">            </span>ierr = MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES); CHKERRQ(ierr);<br>
<span class="apple-converted-space">        </span>}<br>
<span class="apple-converted-space">        </span>if (i < n - 1) { // North<br>
<span class="apple-converted-space">            </span>J = Ii + n;<br>
<span class="apple-converted-space">            </span>v = 1.0;<br>
<span class="apple-converted-space">            </span>ierr = MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES); CHKERRQ(ierr);<br>
<span class="apple-converted-space">        </span>}<br>
<span class="apple-converted-space">        </span>if (j > 0) { // West<br>
<span class="apple-converted-space">            </span>J = Ii - 1;<br>
<span class="apple-converted-space">            </span>v = 1.0;<br>
<span class="apple-converted-space">            </span>ierr = MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES); CHKERRQ(ierr);<br>
<span class="apple-converted-space">        </span>}<br>
<span class="apple-converted-space">        </span>if (j < n - 1) { // East<br>
<span class="apple-converted-space">            </span>J = Ii + 1;<br>
<span class="apple-converted-space">            </span>v = 1.0;<br>
<span class="apple-converted-space">            </span>ierr = MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES); CHKERRQ(ierr);<br>
<span class="apple-converted-space">        </span>}<br>
<span class="apple-converted-space">    </span>}<br>
<span class="apple-converted-space">    </span>ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);<br>
<br>
<span class="apple-converted-space">    </span>// Compute the RHS corresponding to the true solution<br>
<span class="apple-converted-space">    </span>ierr = MatMult(A, x_true, b); CHKERRQ(ierr);<br>
<br>
<span class="apple-converted-space">    </span>// Set up and solve the linear system<br>
<span class="apple-converted-space">    </span>ierr = KSPCreate(PETSC_COMM_WORLD, &ksp); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>ierr = KSPSetOperators(ksp, A, A); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>ierr = KSPSetType(ksp, KSPGMRES); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>ierr = KSPSetTolerances(ksp, 1e-5, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT); CHKERRQ(ierr);<br>
<br>
<span class="apple-converted-space">    </span>/* Set up the monitor */<br>
<span class="apple-converted-space">    </span>ierr = KSPMonitorSet(ksp, MyKSPMonitor, NULL, NULL); CHKERRQ(ierr);<br>
<br>
<span class="apple-converted-space">    </span>// Start timing<br>
<span class="apple-converted-space">    </span>PetscTime(&t1);<br>
<br>
<span class="apple-converted-space">    </span>ierr = KSPSolve(ksp, b, x); CHKERRQ(ierr);<br>
<br>
<span class="apple-converted-space">    </span>// Stop timing<br>
<span class="apple-converted-space">    </span>PetscTime(&t2);<br>
<br>
<span class="apple-converted-space">    </span>// Compute error<br>
<span class="apple-converted-space">    </span>ierr = VecDuplicate(x, &e); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>ierr = VecWAXPY(e, -1.0, x_true, x); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>PetscReal norm_error, norm_true;<br>
<span class="apple-converted-space">    </span>ierr = VecNorm(e, NORM_2, &norm_error); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>ierr = VecNorm(x_true, NORM_2, &norm_true); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>PetscReal relative_error = norm_error / norm_true;<br>
<span class="apple-converted-space">    </span>if (rank == 0) { // Print only from the first MPI process<br>
<span class="apple-converted-space">        </span>PetscPrintf(PETSC_COMM_WORLD, "Relative error ||x - x_true||_2 / ||x_true||_2: %g\n", (double)relative_error);<br>
<span class="apple-converted-space">    </span>}<br>
<br>
<span class="apple-converted-space">    </span>// Output the wall time taken for MatMult<br>
<span class="apple-converted-space">    </span>PetscPrintf(PETSC_COMM_WORLD, "Time taken for KSPSolve: %f seconds\n", t2 - t1);<br>
<br>
<span class="apple-converted-space">    </span>// Cleanup<br>
<span class="apple-converted-space">    </span>ierr = VecDestroy(&x); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>ierr = VecDestroy(&b); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>ierr = VecDestroy(&x_true); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>ierr = VecDestroy(&e); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>ierr = MatDestroy(&A); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>ierr = KSPDestroy(&ksp); CHKERRQ(ierr);<br>
<span class="apple-converted-space">    </span>PetscFinalize();<br>
<span class="apple-converted-space">    </span>return 0;<br>
}<br>
<br>
Here are some profiling results for GMERS solution.<br>
<br>
OPENBLAS_NUM_THREADS = 1, iteration steps<span class="apple-converted-space">  </span>= 859, solution time = 16.1<br>
OPENBLAS_NUM_THREADS = 2, iteration steps<span class="apple-converted-space">  </span>= 859, solution time = 16.3</span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-family:"Helvetica Neue"">OPENBLAS_NUM_THREADS = 4, iteration steps<span class="apple-converted-space">  </span>= 859, solution time = 16.7</span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-family:"Helvetica Neue"">OPENBLAS_NUM_THREADS = 8, iteration steps<span class="apple-converted-space">  </span>= 859, solution time = 16.8</span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-family:"Helvetica Neue"">OPENBLAS_NUM_THREADS = 16, iteration steps<span class="apple-converted-space">  </span>= 859, solution time = 17.8</span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-family:"Helvetica Neue""><br>
<b>I am using one workstation with Intel® Core™ i9-11900K Processor, 8 cores, 16 threads. Note that I am not using multiple MPI processes, such as mpirun/mpiexec, the default number of MPI processes should be 1, correct if I am wrong.</b></span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-family:"Helvetica Neue""> </span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-family:"Helvetica Neue"">Thank you in advance!<br>
<br>
Sincerely,</span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-family:"Helvetica Neue"">Yongzhong</span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:11.0pt"> </span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span lang="EN-US" style="font-size:11.0pt;font-family:"Calibri",sans-serif">-----------------------------------------------------------</span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><b><span lang="EN-US" style="font-size:11.0pt;font-family:"Calibri",sans-serif">Yongzhong Li</span></b></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span lang="EN-US" style="font-size:11.0pt;font-family:"Calibri",sans-serif">PhD student | Electromagnetics Group</span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span lang="EN-US" style="font-size:11.0pt;font-family:"Calibri",sans-serif">Department of Electrical & Computer Engineering</span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span lang="EN-US" style="font-size:11.0pt;font-family:"Calibri",sans-serif">University of Toronto</span></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><a href="https://urldefense.us/v3/__http://www.modelics.org__;!!G_uCfscf7eWS!efVv_hPkRBEhyquXer2c8sFeGrjOtTjEGicYg2niCyfT9swzjLFyf6k4XrhKElaF-cX_Q02y9KSTRNFHPlKhXMtuzaTekCWcXgw$"><span lang="EN-US" style="font-size:11.0pt;font-family:"Calibri",sans-serif;color:#0563C1">http://www.modelics.org</span></a></p>
</div>
</div>
</div>
</div>
</div>
</blockquote>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</blockquote>
</div>
</div>
</div>
</div>
</div>
</div>
</blockquote>
</div>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
</div>
</div>
</div>
</div>
</div>
</body>
</html>