<!-- BaNnErBlUrFlE-BoDy-start -->
<!-- Preheader Text : BEGIN -->
<div style="display:none !important;display:none;visibility:hidden;mso-hide:all;font-size:1px;color:#ffffff;line-height:1px;height:0px;max-height:0px;opacity:0;overflow:hidden;">
Thank you Junchao for your kind help! I ran this test case and observed the same things as you did, but I am not sure if it is using GMRES or not. Thanks, Yongzhong From: Junchao Zhang <junchao. zhang@ gmail. com> Date: Wednesday, June 26,
</div>
<!-- Preheader Text : END -->
<!-- Email Banner : BEGIN -->
<div style="display:none !important;display:none;visibility:hidden;mso-hide:all;font-size:1px;color:#ffffff;line-height:1px;height:0px;max-height:0px;opacity:0;overflow:hidden;">ZjQcmQRYFpfptBannerStart</div>
<!--[if ((ie)|(mso))]>
<table border="0" cellspacing="0" cellpadding="0" width="100%" style="padding: 16px 0px 16px 0px; direction: ltr" ><tr><td>
<table border="0" cellspacing="0" cellpadding="0" style="padding: 0px 10px 5px 6px; width: 100%; border-radius:4px; border-top:4px solid #90a4ae;background-color:#D0D8DC;"><tr><td valign="top">
<table align="left" border="0" cellspacing="0" cellpadding="0" style="padding: 4px 8px 4px 8px">
<tr><td style="color:#000000; font-family: 'Arial', sans-serif; font-weight:bold; font-size:14px; direction: ltr">
This Message Is From an External Sender
</td></tr>
<tr><td style="color:#000000; font-weight:normal; font-family: 'Arial', sans-serif; font-size:12px; direction: ltr">
This message came from outside your organization.
</td></tr>
</table>
</td></tr></table>
</td></tr></table>
<![endif]-->
<![if !((ie)|(mso))]>
<div dir="ltr" id="pfptBannerp9z0x9c" style="all: revert !important; display:block !important; text-align: left !important; margin:16px 0px 16px 0px !important; padding:8px 16px 8px 16px !important; border-radius: 4px !important; min-width: 200px !important; background-color: #D0D8DC !important; background-color: #D0D8DC; border-top: 4px solid #90a4ae !important; border-top: 4px solid #90a4ae;">
<div id="pfptBannerp9z0x9c" style="all: unset !important; float:left !important; display:block !important; margin: 0px 0px 1px 0px !important; max-width: 600px !important;">
<div id="pfptBannerp9z0x9c" style="all: unset !important; display:block !important; visibility: visible !important; background-color: #D0D8DC !important; color:#000000 !important; color:#000000; font-family: 'Arial', sans-serif !important; font-family: 'Arial', sans-serif; font-weight:bold !important; font-weight:bold; font-size:14px !important; line-height:18px !important; line-height:18px">
This Message Is From an External Sender
</div>
<div id="pfptBannerp9z0x9c" style="all: unset !important; display:block !important; visibility: visible !important; background-color: #D0D8DC !important; color:#000000 !important; color:#000000; font-weight:normal; font-family: 'Arial', sans-serif !important; font-family: 'Arial', sans-serif; font-size:12px !important; line-height:18px !important; line-height:18px; margin-top:2px !important;">
This message came from outside your organization.
</div>
</div>
<div style="clear: both !important; display: block !important; visibility: hidden !important; line-height: 0 !important; font-size: 0.01px !important; height: 0px"> </div>
</div>
<![endif]>
<div style="display:none !important;display:none;visibility:hidden;mso-hide:all;font-size:1px;color:#ffffff;line-height:1px;height:0px;max-height:0px;opacity:0;overflow:hidden;">ZjQcmQRYFpfptBannerEnd</div>
<!-- Email Banner : END -->
<!-- BaNnErBlUrFlE-BoDy-end -->
<html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head><!-- BaNnErBlUrFlE-HeAdEr-start -->
<style>
#pfptBannerp9z0x9c { all: revert !important; display: block !important;
visibility: visible !important; opacity: 1 !important;
background-color: #D0D8DC !important;
max-width: none !important; max-height: none !important }
.pfptPrimaryButtonp9z0x9c:hover, .pfptPrimaryButtonp9z0x9c:focus {
background-color: #b4c1c7 !important; }
.pfptPrimaryButtonp9z0x9c:active {
background-color: #90a4ae !important; }
</style>
<!-- BaNnErBlUrFlE-HeAdEr-end -->
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<meta name="Generator" content="Microsoft Word 15 (filtered medium)">
<style><!--
/* Font Definitions */
@font-face
{font-family:Helvetica;
panose-1:0 0 0 0 0 0 0 0 0 0;}
@font-face
{font-family:"Cambria Math";
panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
{font-family:DengXian;
panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
{font-family:Calibri;
panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
{font-family:Aptos;
panose-1:2 11 0 4 2 2 2 2 2 4;}
@font-face
{font-family:"Segoe UI";
panose-1:2 11 5 2 4 2 4 2 2 3;}
@font-face
{font-family:"\@DengXian";
panose-1:2 1 6 0 3 1 1 1 1 1;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{margin:0cm;
font-size:12.0pt;
font-family:"Aptos",sans-serif;}
a:link, span.MsoHyperlink
{mso-style-priority:99;
color:blue;
text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
{mso-style-priority:99;
color:purple;
text-decoration:underline;}
p.msonormal0, li.msonormal0, div.msonormal0
{mso-style-name:msonormal;
mso-margin-top-alt:auto;
margin-right:0cm;
mso-margin-bottom-alt:auto;
margin-left:0cm;
font-size:12.0pt;
font-family:"Aptos",sans-serif;}
span.m-5979530742528590611gmailsignatureprefix
{mso-style-name:m_-5979530742528590611gmailsignatureprefix;}
span.EmailStyle19
{mso-style-type:personal-reply;
font-family:"Aptos",sans-serif;
color:windowtext;}
.MsoChpDefault
{mso-style-type:export-only;
font-size:10.0pt;
mso-ligatures:none;}
@page WordSection1
{size:612.0pt 792.0pt;
margin:72.0pt 72.0pt 72.0pt 72.0pt;}
div.WordSection1
{page:WordSection1;}
/* List Definitions */
@list l0
{mso-list-id:1761874575;
mso-list-template-ids:986752038;}
@list l0:level1
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:36.0pt;
mso-level-number-position:left;
text-indent:-18.0pt;
mso-ansi-font-size:10.0pt;
font-family:Symbol;}
@list l0:level2
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:72.0pt;
mso-level-number-position:left;
text-indent:-18.0pt;
mso-ansi-font-size:10.0pt;
font-family:Symbol;}
@list l0:level3
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:108.0pt;
mso-level-number-position:left;
text-indent:-18.0pt;
mso-ansi-font-size:10.0pt;
font-family:Symbol;}
@list l0:level4
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:144.0pt;
mso-level-number-position:left;
text-indent:-18.0pt;
mso-ansi-font-size:10.0pt;
font-family:Symbol;}
@list l0:level5
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:180.0pt;
mso-level-number-position:left;
text-indent:-18.0pt;
mso-ansi-font-size:10.0pt;
font-family:Symbol;}
@list l0:level6
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:216.0pt;
mso-level-number-position:left;
text-indent:-18.0pt;
mso-ansi-font-size:10.0pt;
font-family:Symbol;}
@list l0:level7
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:252.0pt;
mso-level-number-position:left;
text-indent:-18.0pt;
mso-ansi-font-size:10.0pt;
font-family:Symbol;}
@list l0:level8
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:288.0pt;
mso-level-number-position:left;
text-indent:-18.0pt;
mso-ansi-font-size:10.0pt;
font-family:Symbol;}
@list l0:level9
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:324.0pt;
mso-level-number-position:left;
text-indent:-18.0pt;
mso-ansi-font-size:10.0pt;
font-family:Symbol;}
ol
{margin-bottom:0cm;}
ul
{margin-bottom:0cm;}
--></style>
</head>
<body lang="en-CN" link="blue" vlink="purple" style="word-wrap:break-word">
<div class="WordSection1">
<p class="MsoNormal"><span lang="EN-US" style="font-size:11.0pt">Thank you Junchao for your kind help! I ran this test case and observed the same things as you did, but I am not sure if it is using GMRES or not.<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-size:11.0pt"><br>
Thanks,<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-size:11.0pt">Yongzhong <o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-size:11.0pt"><o:p> </o:p></span></p>
<div id="mail-editor-reference-message-container">
<div>
<div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0cm 0cm 0cm">
<p class="MsoNormal" style="margin-bottom:12.0pt"><b><span style="color:black">From:
</span></b><span style="color:black">Junchao Zhang <junchao.zhang@gmail.com><br>
<b>Date: </b>Wednesday, June 26, 2024 at 11:13</span><span style="font-family:"Arial",sans-serif;color:black"> </span><span style="color:black">AM<br>
<b>To: </b>Yongzhong Li <yongzhong.li@mail.utoronto.ca><br>
<b>Cc: </b>Matthew Knepley <knepley@gmail.com>, Pierre Jolivet <pierre@joliv.et>, petsc-users@mcs.anl.gov <petsc-users@mcs.anl.gov><br>
<b>Subject: </b>Re: [petsc-users] [petsc-maint] Assistance Needed with PETSc KSPSolve Performance Issue<o:p></o:p></span></p>
</div>
<div>
<div>
<p class="MsoNormal">Yongzhong,<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"> Try Barry's approach first. BTW, I ran another petsc test. You can see GEMV was used in KSPSolve. You could also try this one. <o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
<div>
<p class="MsoNormal"><span style="font-family:"Courier New"">$ cd src/ksp/ksp/tutorials</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><span style="font-family:"Courier New"">$ make bench_kspsolve</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><span style="font-family:"Courier New"">$ MKL_VERBOSE=1 OMP_PROC_BIND=spread MKL_NUM_THREADS=8 ./bench_kspsolve -split_ksp -mat_type aijmkl</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><span style="font-family:"Courier New"">===========================================<br>
Test: KSP performance - Poisson<br>
Input matrix: 27-pt finite difference stencil<br>
-n 100<br>
DoFs = 1000000<br>
Number of nonzeros = 26463592<br>
<br>
Step1 - creating Vecs and Mat...<br>
Step2a - running PCSetUp()...<br>
Step2b - running KSPSolve()...<br>
MKL_VERBOSE oneMKL 2022.0 Product build 20211112 for Intel(R) 64 architecture Intel(R) Architecture processors, Lnx 3.18GHz lp64 gnu_thread<br>
MKL_VERBOSE ZSCAL(1000000,0x7ffccef20c58,0x7fa9432b5e60,1) 474.25us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:8<br>
MKL_VERBOSE ZSCAL(1000000,0x7ffccef20c58,0x7fa9441f8260,1) 1.93ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:8<br>
MKL_VERBOSE <b>ZGEMV</b>(C,1000000,2,0x7ffccef20c20,0x7fa9432b5e60,1000000,0x7fa94513a660,1,0x7ffccef20c30,0x1c4b610,1) 1.86ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:8<br>
MKL_VERBOSE ZSCAL(1000000,0x7ffccef20c58,0x7fa94513a660,1) 2.55ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:8<br>
MKL_VERBOSE <b>ZGEMV</b>(C,1000000,3,0x7ffccef20c20,0x7fa9432b5e60,1000000,0x7fa8cb7a6660,1,0x7ffccef20c30,0x1c4b610,1) 2.95ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:8</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
<p class="MsoNormal"><br clear="all">
<o:p></o:p></p>
<div>
<div>
<div>
<p class="MsoNormal">--Junchao Zhang<o:p></o:p></p>
</div>
</div>
</div>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
<p class="MsoNormal"><o:p> </o:p></p>
<div>
<div>
<p class="MsoNormal">On Tue, Jun 25, 2024 at 10:19<span style="font-family:"Arial",sans-serif"> </span>PM Yongzhong Li <<a href="mailto:yongzhong.li@mail.utoronto.ca">yongzhong.li@mail.utoronto.ca</a>> wrote:<o:p></o:p></p>
</div>
<blockquote style="border:none;border-left:solid #CCCCCC 1.0pt;padding:0cm 0cm 0cm 6.0pt;margin-left:4.8pt;margin-right:0cm">
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;margin-bottom:12.0pt"><span lang="EN-US" style="font-size:11.0pt">Hi Junchao, thank you for your help for these benchmarking test!
<br>
<br>
I check out to petsc/main and did a few things to verify from my side,<br>
<br>
1. I ran the microbenchmark (vec/vec/tests/ex2k.c) test on my compute node. The results are as follow,</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">$ MKL_NUM_THREADS=64 ./ex2k -n 15 -m 4<br>
Vector(N) VecMDot-1 VecMDot-3 VecMDot-8 VecMDot-30 (us)</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">--------------------------------------------------------------------------</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 128 14.5 1.2 1.8 5.2
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 256 1.5 0.9 1.6 4.7
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 512 2.7 2.8 6.1 13.2
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 1024 4.0 4.0 9.3 16.4
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 2048 7.4 7.3 11.3 39.3
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 4096 14.2 13.9 19.1 93.4
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 8192 28.8 26.3 25.4 31.3
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 16384 54.1 25.8 26.7 33.8
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 32768 109.8 25.7 24.2 56.0
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 65536 220.2 24.4 26.5 89.0
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 131072 424.1 31.5 36.1 149.6
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 262144 898.1 37.1 53.9 286.1
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 524288 1754.6 48.7 100.3 1122.2
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 1048576 3645.8 86.5 347.9 2950.4
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 2097152 7371.4 308.7 1440.6 6874.9</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> </span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">$ MKL_NUM_THREADS=1 ./ex2k -n 15 -m 4</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">Vector(N) VecMDot-1 VecMDot-3 VecMDot-8 VecMDot-30 (us)</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">--------------------------------------------------------------------------</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 128 14.9 1.2 1.9 5.2
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 256 1.5 1.0 1.7 4.7
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 512 2.7 2.8 6.1 12.0
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 1024 3.9 4.0 9.3 16.8
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 2048 7.4 7.3 10.4 41.3
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 4096 14.0 13.8 18.6 84.2
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 8192 27.0 21.3 43.8 177.5
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 16384 54.1 34.1 89.1 330.4
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 32768 110.4 82.1 203.5 781.1
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 65536 213.0 191.8 423.9 1696.4
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 131072 428.7 360.2 934.0 4080.0
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 262144 883.4 723.2 1745.6 10120.7
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 524288 1817.5 1466.1 4751.4 23217.2
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 1048576 3611.0 3796.5 11814.9 48687.7
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> 2097152 7401.9 10592.0 27543.2 106565.4</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"><br>
I can see the speed up brought by more MKL threads, and if I set NKL_VERBOSE to 1, I can see something like
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> </span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><i><span lang="EN-US" style="font-size:11.0pt">MKL_VERBOSE ZGEMV(C,262144,8,0x7ffd375d6470,0x2ac76e7fb010,262144,0x16d0f40,1,0x7ffd375d6480,0x16435d0,1) 32.70us CNR:OFF Dyn:1 FastMM:1
TID:0 NThr:6 ca<br>
<br>
</span></i><span lang="EN-US" style="font-size:11.0pt">From my understanding, the VecMDot()/VecMAXPY() can benefit from more MKL threads in my compute node and is using ZGEMV MKL BLAS.<br>
<br>
However, when I ran my own program and set MKL_VERBOSE to 1, it is very strange that I still can’t find any MKL outputs, though I can see from the PETSc log that VecMDot and VecMAXPY() are called.
</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"><br>
I am wondering are VecMDot and VecMAXPY in KSPGMRESOrthog optimized in a way that is similar to ex2k test? Should I expect to see MKL outputs for whatever linear system I solve with KSPGMRES? Does it relate to if it is dense matrix or sparse matrix, although
I am not really understand why VecMDot/MAXPY() have something to do with dense matrix-vector multiplication.
<br>
<br>
Thank you,</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;margin-bottom:12.0pt"><span lang="EN-US" style="font-size:11.0pt">Yongzhong</span><o:p></o:p></p>
<div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0cm 0cm 0cm">
<p class="MsoNormal" style="mso-margin-top-alt:auto;margin-bottom:12.0pt"><b><span style="color:black">From:
</span></b><span style="color:black">Junchao Zhang <<a href="mailto:junchao.zhang@gmail.com" target="_blank">junchao.zhang@gmail.com</a>><br>
<b>Date: </b>Tuesday, June 25, 2024 at 6:34</span><span style="font-family:"Arial",sans-serif;color:black"> </span><span style="color:black">PM<br>
<b>To: </b>Matthew Knepley <<a href="mailto:knepley@gmail.com" target="_blank">knepley@gmail.com</a>><br>
<b>Cc: </b>Yongzhong Li <<a href="mailto:yongzhong.li@mail.utoronto.ca" target="_blank">yongzhong.li@mail.utoronto.ca</a>>, Pierre Jolivet <<a href="mailto:pierre@joliv.et" target="_blank">pierre@joliv.et</a>>,
<a href="mailto:petsc-users@mcs.anl.gov" target="_blank">petsc-users@mcs.anl.gov</a> <<a href="mailto:petsc-users@mcs.anl.gov" target="_blank">petsc-users@mcs.anl.gov</a>><br>
<b>Subject: </b>Re: [petsc-users] [petsc-maint] Assistance Needed with PETSc KSPSolve Performance Issue</span><o:p></o:p></p>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">Hi, Yongzhong,<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> Since the two kernels of KSPGMRESOrthog are VecMDot and VecMAXPY, if we can speed up the two with OpenMP threads, then we can speed up KSPGMRESOrthog. We recently added an optimization
to do VecMDot/MAXPY() in dense matrix-vector multiplication (i.e., BLAS2 GEMV, with tall-and-skinny matrices ). So with MKL_VERBOSE=1, you should see something like "MKL_VERBOSE ZGEMV ..." in output. If not, could you try again with petsc/main?<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> petsc has a microbenchmark (vec/vec/tests/ex2k.c) to test them. I ran VecMDot with multithreaded oneMKL (via setting MKL_NUM_THREADS), it was strange to see no speedup. I then
configured petsc with openblas, I did see better performance with more threads<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-family:"Courier New"">$ OMP_PROC_BIND=spread OMP_NUM_THREADS=1 ./ex2k -n 15 -m 4<br>
Vector(N) VecMDot-3 VecMDot-8 VecMDot-30 (us)<br>
--------------------------------------------------------------------------<br>
128 2.0 2.5 6.1 <br>
256 1.8 2.7 7.0 <br>
512 2.1 3.1 8.6 <br>
1024 2.7 4.0 12.3 <br>
2048 3.8 6.3 28.0 <br>
4096 6.1 10.6 42.4 <br>
8192 10.9 21.8 79.5 <br>
16384 21.2 39.4 149.6 <br>
32768 45.9 75.7 224.6 <br>
65536 142.2 215.8 732.1 <br>
131072 169.1 233.2 1729.4 <br>
262144 367.5 830.0 4159.2 <br>
524288 999.2 1718.1 8538.5 <br>
1048576 2113.5 4082.1 18274.8 <br>
2097152 5392.6 10273.4 43273.4 </span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-family:"Courier New"">$ OMP_PROC_BIND=spread OMP_NUM_THREADS=8 ./ex2k -n 15 -m 4<br>
Vector(N) VecMDot-3 VecMDot-8 VecMDot-30 (us)<br>
--------------------------------------------------------------------------<br>
128 2.0 2.5 6.0 <br>
256 1.8 2.7 15.0 <br>
512 2.1 9.0 16.6 <br>
1024 2.6 8.7 16.1 <br>
2048 7.7 10.3 20.5 <br>
4096 9.9 11.4 25.9 <br>
8192 14.5 22.1 39.6 <br>
16384 25.1 27.8 67.8 <br>
32768 44.7 95.7 91.5 <br>
65536 82.1 156.8 165.1 <br>
131072 194.0 335.1 341.5 <br>
262144 388.5 380.8 612.9 <br>
524288 1046.7 967.1 1653.3 <br>
1048576 1997.4 2169.0 4034.4 <br>
2097152 5502.9 5787.3 12608.1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">The tall-and-skinny matrices in KSPGMRESOrthog vary in width. The average speedup depends on components. So I suggest you run ex2k to see in your environment whether oneMKL can
speedup the kernels. <o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">--Junchao Zhang<o:p></o:p></p>
</div>
</div>
</div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">On Mon, Jun 24, 2024 at 11:35<span style="font-family:"Arial",sans-serif"> </span>AM Junchao Zhang <<a href="mailto:junchao.zhang@gmail.com" target="_blank">junchao.zhang@gmail.com</a>>
wrote:<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">Let me run some examples on our end to see whether the code calls expected functions. <o:p></o:p></p>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><br clear="all">
<o:p></o:p></p>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">--Junchao Zhang<o:p></o:p></p>
</div>
</div>
</div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
</div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">On Mon, Jun 24, 2024 at 10:46<span style="font-family:"Arial",sans-serif"> </span>AM Matthew Knepley <<a href="mailto:knepley@gmail.com" target="_blank">knepley@gmail.com</a>> wrote:<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:1.0pt;color:white">On Mon, Jun 24, 2024 at 11:</span><span style="font-size:1.0pt;font-family:"Arial",sans-serif;color:white"> </span><span style="font-size:1.0pt;color:white">21
AM Yongzhong Li <yongzhong.</span><span style="font-size:1.0pt;font-family:"Arial",sans-serif;color:white"> </span><span style="font-size:1.0pt;color:white">li@</span><span style="font-size:1.0pt;font-family:"Arial",sans-serif;color:white"> </span><span style="font-size:1.0pt;color:white">mail.</span><span style="font-size:1.0pt;font-family:"Arial",sans-serif;color:white"> </span><span style="font-size:1.0pt;color:white">utoronto.</span><span style="font-size:1.0pt;font-family:"Arial",sans-serif;color:white"> </span><span style="font-size:1.0pt;color:white">ca>
wrote: Thank you Pierre for your information. Do we have a conclusion for my original question about the parallelization efficiency for different stages of
</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:1.0pt;color:white">ZjQcmQRYFpfptBannerStart</span><o:p></o:p></p>
</div>
<div style="border:none;border-top:solid #90A4AE 3.0pt;padding:6.0pt 0cm 0cm 0cm;margin-top:12.0pt;margin-bottom:12.0pt;border-radius:4px;min-width:200px" id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203pfptBanner0gnzkk2">
<div style="margin-bottom:.75pt;float:left" id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203pfptBanner0gnzkk2">
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203pfptBanner0gnzkk2">
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;line-height:13.5pt;background:#D0D8DC">
<b><span style="font-size:10.5pt;font-family:"Arial",sans-serif;color:black">This Message Is From an External Sender
</span></b><o:p></o:p></p>
</div>
<div style="margin-top:1.5pt" id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203pfptBanner0gnzkk2">
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;line-height:13.5pt;background:#D0D8DC">
<span style="font-size:9.0pt;font-family:"Arial",sans-serif;color:black">This message came from outside your organization.
</span><o:p></o:p></p>
</div>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;line-height:0%;background:#D0D8DC">
<span style="font-size:1.0pt;color:black"> </span><o:p></o:p></p>
</div>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:1.0pt;color:white">ZjQcmQRYFpfptBannerEnd</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">On Mon, Jun 24, 2024 at 11:21<span style="font-family:"Arial",sans-serif"> </span>AM Yongzhong Li <<a href="mailto:yongzhong.li@mail.utoronto.ca" target="_blank">yongzhong.li@mail.utoronto.ca</a>>
wrote:<o:p></o:p></p>
</div>
<div>
<blockquote style="border:none;border-left:solid #CCCCCC 1.0pt;padding:0cm 0cm 0cm 6.0pt;margin-left:4.8pt;margin-top:5.0pt;margin-right:0cm;margin-bottom:5.0pt">
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:1.0pt;color:white">Thank you Pierre for your information. Do we have a conclusion for my original question about the parallelization efficiency for different
stages of KSP Solve? Do we need to do more testing to figure out the issues? Thank you, Yongzhong From:</span><span style="font-size:1.0pt;font-family:"Arial",sans-serif;color:white"> </span><span style="font-size:1.0pt;color:white">
</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:1.0pt;color:white">ZjQcmQRYFpfptBannerStart</span><o:p></o:p></p>
</div>
<div style="border:none;border-top:solid #90A4AE 3.0pt;padding:6.0pt 0cm 0cm 0cm;margin-top:12.0pt;margin-bottom:12.0pt;border-radius:4px;min-width:200px" id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261pfptBanner7d6k4n9">
<div style="margin-bottom:.75pt;float:left" id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261pfptBanner7d6k4n9">
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261pfptBanner7d6k4n9">
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;line-height:13.5pt;background:#D0D8DC">
<b><span style="font-size:10.5pt;font-family:"Arial",sans-serif;color:black">This Message Is From an External Sender
</span></b><o:p></o:p></p>
</div>
<div style="margin-top:1.5pt" id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261pfptBanner7d6k4n9">
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;line-height:13.5pt;background:#D0D8DC">
<span style="font-size:9.0pt;font-family:"Arial",sans-serif;color:black">This message came from outside your organization.
</span><o:p></o:p></p>
</div>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;line-height:0%;background:#D0D8DC">
<span style="font-size:1.0pt;color:black"> </span><o:p></o:p></p>
</div>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:1.0pt;color:white">ZjQcmQRYFpfptBannerEnd</span><o:p></o:p></p>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">Thank you Pierre for your information. Do we have a conclusion for my original question about the parallelization efficiency for different
stages of KSP Solve? Do we need to do more testing to figure out the issues?</span><o:p></o:p></p>
</div>
</div>
</div>
</blockquote>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">We have an extended discussion of this here: <a href="https://urldefense.us/v3/__https://petsc.org/release/faq/*what-kind-of-parallel-computers-or-clusters-are-needed-to-use-petsc-or-why-do-i-get-little-speedup__;Iw!!G_uCfscf7eWS!aQJpmm5W6l6FUiumnIPmkouzwzNUfx-Dyq04i1O2KS_InQGk6qjI7wUir0Hx6QEUQE2AMiJDsez3x4zRO7V_$" target="_blank">https://petsc.org/release/faq/#what-kind-of-parallel-computers-or-clusters-are-needed-to-use-petsc-or-why-do-i-get-little-speedup</a><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">The kinds of operations you are talking about (SpMV, VecDot, VecAXPY, etc) are memory bandwidth limited. If there is no more bandwidth to be marshalled on your board, then adding
more processes does nothing at all. This is why people were asking about how many "nodes" you are running on, because that is the unit of memory bandwidth, not "cores" which make little difference.<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> Thanks,<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> Matt<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
<blockquote style="border:none;border-left:solid #CCCCCC 1.0pt;padding:0cm 0cm 0cm 6.0pt;margin-left:4.8pt;margin-top:5.0pt;margin-right:0cm;margin-bottom:5.0pt">
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">Thank you,</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">Yongzhong</span><o:p></o:p></p>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt"> </span><o:p></o:p></p>
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261mail-editor-reference-message-container">
<div>
<div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0cm 0cm 0cm">
<p class="MsoNormal" style="mso-margin-top-alt:auto;margin-bottom:12.0pt"><b><span style="color:black">From:
</span></b><span style="color:black">Pierre Jolivet <</span><a href="mailto:pierre@joliv.et" target="_blank">pierre@joliv.et</a><span style="color:black">><br>
<b>Date: </b>Sunday, June 23, 2024 at 12:41</span><span style="font-family:"Arial",sans-serif;color:black"> </span><span style="color:black">AM<br>
<b>To: </b>Yongzhong Li <</span><a href="mailto:yongzhong.li@mail.utoronto.ca" target="_blank">yongzhong.li@mail.utoronto.ca</a><span style="color:black">><br>
<b>Cc: </b></span><a href="mailto:petsc-users@mcs.anl.gov" target="_blank">petsc-users@mcs.anl.gov</a><span style="color:black"> <</span><a href="mailto:petsc-users@mcs.anl.gov" target="_blank">petsc-users@mcs.anl.gov</a><span style="color:black">><br>
<b>Subject: </b>Re: [petsc-users] [petsc-maint] Assistance Needed with PETSc KSPSolve Performance Issue</span><o:p></o:p></p>
</div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;margin-bottom:12.0pt"> <o:p></o:p></p>
<blockquote style="margin-top:5.0pt;margin-bottom:5.0pt">
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">On 23 Jun 2024, at 4:07<span style="font-family:"Arial",sans-serif"> </span>AM, Yongzhong Li <<a href="mailto:yongzhong.li@mail.utoronto.ca" target="_blank">yongzhong.li@mail.utoronto.ca</a>>
wrote:<o:p></o:p></p>
</div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
<div>
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261pfptBannerfa2qtuo">
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261pfptBannerfa2qtuo">
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261pfptBannerfa2qtuo">
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-family:"Arial",sans-serif">This Message Is From an External Sender</span><o:p></o:p></p>
</div>
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261pfptBannerfa2qtuo">
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-family:"Arial",sans-serif">This message came from outside your organization.</span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">Yeah, I ran my program again using -mat_view::ascii_info and set MKL_VERBOSE to be 1, then I noticed the outputs suggested that the matrix
to be seqaijmkl type (I’ve attached a few as below)<br>
<br>
--> Setting up matrix-vector products...</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> </span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">Mat Object: 1 MPI process</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> type: seqaijmkl</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> rows=16490, cols=35937</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> total: nonzeros=128496, allocated nonzeros=128496</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> total number of mallocs used during MatSetValues calls=0</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> not using I-node routines</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">Mat Object: 1 MPI process</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> type: seqaijmkl</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> rows=16490, cols=35937</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> total: nonzeros=128496, allocated nonzeros=128496</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> total number of mallocs used during MatSetValues calls=0</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> not using I-node routines</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> </span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">--> Solving the system...</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> </span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">Excitation 1 of 1...</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> </span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">================================================</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">Iterative solve completed in 7435 ms.</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">CONVERGED: rtol.</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">Iterations: 72</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">Final relative residual norm: 9.22287e-07</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">================================================</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">[CPU TIME] System solution: 2.27160000e+02 s.</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">[WALL TIME] System solution: 7.44387218e+00 s.<br>
<br>
However, it seems to me that there were still no MKL outputs even I set MKL_VERBOSE to be 1. Although, I think it should be many spmv operations when doing KSPSolve(). Do you see the possible reasons?</span><o:p></o:p></p>
</div>
</div>
</div>
</blockquote>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">SPMV are not reported with MKL_VERBOSE (last I checked), only dense BLAS is.<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">Thanks,<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">Pierre<o:p></o:p></p>
</div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;margin-bottom:12.0pt"> <o:p></o:p></p>
<blockquote style="margin-top:5.0pt;margin-bottom:5.0pt">
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">Thanks,</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">Yongzhong</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> </span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt"> </span><o:p></o:p></p>
</div>
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261mail-editor-reference-message-container">
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;margin-bottom:12.0pt"><b>From: </b>Matthew Knepley <<a href="mailto:knepley@gmail.com" target="_blank">knepley@gmail.com</a>><br>
<b>Date: </b>Saturday, June 22, 2024 at 5:56<span style="font-family:"Arial",sans-serif"> </span>PM<br>
<b>To: </b>Yongzhong Li <<a href="mailto:yongzhong.li@mail.utoronto.ca" target="_blank">yongzhong.li@mail.utoronto.ca</a>><br>
<b>Cc: </b>Junchao Zhang <<a href="mailto:junchao.zhang@gmail.com" target="_blank">junchao.zhang@gmail.com</a>>, Pierre Jolivet <<a href="mailto:pierre@joliv.et" target="_blank">pierre@joliv.et</a>>, <a href="mailto:petsc-users@mcs.anl.gov" target="_blank">petsc-users@mcs.anl.gov</a> <<a href="mailto:petsc-users@mcs.anl.gov" target="_blank">petsc-users@mcs.anl.gov</a>><br>
<b>Subject: </b>Re: [petsc-users] [petsc-maint] Assistance Needed with PETSc KSPSolve Performance Issue<o:p></o:p></p>
</div>
<table class="MsoNormalTable" border="0" cellspacing="0" cellpadding="0" align="left" width="100%" style="width:100.0%;display:table;float:none">
<tbody>
<tr>
<td style="background:#A6A6A6;padding:5.25pt 1.5pt 5.25pt 1.5pt"></td>
<td width="100%" style="width:100.0%;background:#EAEAEA;padding:5.25pt 3.75pt 5.25pt 11.25pt">
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;mso-element:frame;mso-element-frame-hspace:2.25pt;mso-element-wrap:around;mso-element-anchor-vertical:paragraph;mso-element-anchor-horizontal:column;mso-height-rule:exactly">
<span lang="ZH-CN" style="font-size:9.0pt;font-family:DengXian;color:#212121">你通常不会收到来自</span><span style="font-size:9.0pt;font-family:"Segoe UI",sans-serif;color:#212121"> </span><span style="color:black"><a href="mailto:knepley@gmail.com" target="_blank"><span style="font-size:9.0pt;font-family:"Segoe UI",sans-serif">knepley@gmail.com</span></a></span><span style="font-size:9.0pt;font-family:"Segoe UI",sans-serif;color:#212121"> </span><span lang="ZH-CN" style="font-size:9.0pt;font-family:DengXian;color:#212121">的电子邮件。</span><span style="color:black"><a href="https://urldefense.us/v3/__https://aka.ms/LearnAboutSenderIdentification__;!!G_uCfscf7eWS!fVvbGldqcUV5ju4jpu5oGmt-VjITi5JpCJzhHxpbgsERLVYZzglpxKOOyrBRGxjRxp7vWHwt3SnINFOQErR1Z8kcDcf3qwbYRxM$" target="_blank"><span lang="ZH-CN" style="font-size:9.0pt;font-family:DengXian">了解这一点为什么很重要</span></a></span><o:p></o:p></p>
</div>
</div>
</td>
<td width="75" style="width:56.25pt;background:#EAEAEA;padding:5.25pt 3.75pt 5.25pt 3.75pt">
</td>
</tr>
</tbody>
</table>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">On Sat, Jun 22, 2024 at 5:03<span style="font-family:"Arial",sans-serif"> </span>PM Yongzhong Li <<a href="mailto:yongzhong.li@mail.utoronto.ca" target="_blank">yongzhong.li@mail.utoronto.ca</a>>
wrote:<o:p></o:p></p>
</div>
</div>
<div>
<blockquote style="border:none;border-left:solid #CCCCCC 1.0pt;padding:0cm 0cm 0cm 6.0pt;margin-left:4.8pt;margin-top:5.0pt;margin-right:0cm;margin-bottom:5.0pt;border-top:currentcolor;border-right:currentcolor;border-bottom:currentcolor">
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:1.0pt;color:white">MKL_VERBOSE=1 ./ex1 matrix nonzeros = 100, allocated nonzeros = 100 MKL_VERBOSE Intel(R) MKL 2019.</span><span style="font-size:1.0pt;font-family:"Arial",sans-serif;color:white"> </span><span style="font-size:1.0pt;color:white">0
Update 4 Product build 20190411 for Intel(R) 64 architecture Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512) with support of Vector</span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:1.0pt;color:white">ZjQcmQRYFpfptBannerStart</span><o:p></o:p></p>
</div>
</div>
<div style="margin-top:12.0pt;margin-bottom:12.0pt;border-width:initial;border-style:initial;border-color:currentcolor;border-radius:4px;min-width:200px" id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322pfptBanner3e1hrxr">
<div style="margin-bottom:.75pt;float:left" id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322pfptBanner3e1hrxr">
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322pfptBanner3e1hrxr">
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;line-height:13.5pt;background:#D0D8DC">
<b><span style="font-size:10.5pt;font-family:"Arial",sans-serif;color:black">This Message Is From an External Sender</span></b><o:p></o:p></p>
</div>
</div>
<div style="margin-top:1.5pt" id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322pfptBanner3e1hrxr">
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;line-height:13.5pt;background:#D0D8DC">
<span style="font-size:9.0pt;font-family:"Arial",sans-serif;color:black">This message came from outside your organization.</span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;background:#D0D8DC">
<span style="font-size:1.0pt;color:black"> </span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:1.0pt;color:white">ZjQcmQRYFpfptBannerEnd</span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-family:"Courier New"">MKL_VERBOSE=1 ./ex1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"><br>
</span><span style="font-size:11.0pt">matrix nonzeros = 100, allocated nonzeros = 100</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE Intel(R) MKL 2019.0 Update 4 Product build 20190411 for Intel(R) 64 architecture Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512)
with support of Vector Neural Network Instructions enabled processors, Lnx 2.50GHz lp64 gnu_thread</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZGEMV(N,10,10,0x7ffd9d7078f0,0x187eb20,10,0x187f7c0,1,0x7ffd9d707900,0x187ff70,1) 167.34ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZSYTRF(L,10,0x1894b50,10,0x1893df0,0x7ffd9d7078c0,-1,0) 77.19ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZSYTRF(L,10,0x1894b50,10,0x1893df0,0x1894490,10,0) 83.97ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZSYTRS(L,10,1,0x1894b50,10,0x1893df0,0x1880720,10,0) 44.94ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZAXPY(10,0x7ffd9d7078f0,0x187f7c0,1,0x1880720,1) 20.72us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZSYTRS(L,10,2,0x1894b50,10,0x1893df0,0x187d2a0,10,0) 4.22us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZGEMM(N,N,10,2,10,0x7ffd9d707790,0x187eb20,10,0x187d2a0,10,0x7ffd9d7077a0,0x1896a70,10) 1.41ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZAXPY(20,0x7ffd9d7078a0,0x1896a70,1,0x187b650,1) 381ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZSYTRF(L,10,0x1894b50,10,0x1893df0,0x7ffd9d707840,-1,0) 742ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZSYTRF(L,10,0x1894b50,10,0x1893df0,0x18951a0,10,0) 4.20us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZSYTRS(L,10,1,0x1894b50,10,0x1893df0,0x1880720,10,0) 2.94us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZAXPY(10,0x7ffd9d7078f0,0x187f7c0,1,0x1880720,1) 292ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZGEMV(N,10,10,0x7ffd9d7078f0,0x187eb20,10,0x187f7c0,1,0x7ffd9d707900,0x187ff70,1) 1.17us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZGETRF(10,10,0x1894b50,10,0x1893df0,0) 202.48ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZGETRS(N,10,1,0x1894b50,10,0x1893df0,0x1880720,10,0) 20.78ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZAXPY(10,0x7ffd9d7078f0,0x187f7c0,1,0x1880720,1) 954ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZGETRS(N,10,2,0x1894b50,10,0x1893df0,0x187d2a0,10,0) 30.74ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZGEMM(N,N,10,2,10,0x7ffd9d707790,0x187eb20,10,0x187d2a0,10,0x7ffd9d7077a0,0x18969c0,10) 3.95us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZAXPY(20,0x7ffd9d7078a0,0x18969c0,1,0x187b650,1) 995ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZGETRF(10,10,0x1894b50,10,0x1893df0,0) 4.09us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZGETRS(N,10,1,0x1894b50,10,0x1893df0,0x1880720,10,0) 3.92us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZAXPY(10,0x7ffd9d7078f0,0x187f7c0,1,0x1880720,1) 274ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZGEMV(N,15,10,0x7ffd9d7078f0,0x187ec70,15,0x187fc30,1,0x7ffd9d707900,0x1880400,1) 1.59us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZGEQRF(15,10,0x1894b40,15,0x1894550,0x7ffd9d707900,-1,0) 47.07us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZGEQRF(15,10,0x1894b40,15,0x1894550,0x1895cb0,10,0) 26.62us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZUNMQR(L,C,15,1,10,0x1894b40,15,0x1894550,0x1895b00,15,0x7ffd9d7078b0,-1,0) 35.32us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZUNMQR(L,C,15,1,10,0x1894b40,15,0x1894550,0x1895b00,15,0x1895cb0,10,0) 42.33ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZTRTRS(U,N,N,10,1,0x1894b40,15,0x1895b00,15,0) 16.11us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZAXPY(10,0x7ffd9d7078f0,0x187fc30,1,0x1880c70,1) 395ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZGEMM(N,N,15,2,10,0x7ffd9d707790,0x187ec70,15,0x187d310,10,0x7ffd9d7077a0,0x187b5b0,15) 3.22us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZUNMQR(L,C,15,2,10,0x1894b40,15,0x1894550,0x1897760,15,0x7ffd9d7078c0,-1,0) 730ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZUNMQR(L,C,15,2,10,0x1894b40,15,0x1894550,0x1897760,15,0x1895cb0,10,0) 4.42us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZTRTRS(U,N,N,10,2,0x1894b40,15,0x1897760,15,0) 5.96us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZAXPY(20,0x7ffd9d7078a0,0x187d310,1,0x1897610,1) 222ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZGEQRF(15,10,0x1894b40,15,0x18954b0,0x7ffd9d707820,-1,0) 685ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZGEQRF(15,10,0x1894b40,15,0x18954b0,0x1895d60,10,0) 6.11us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZUNMQR(L,C,15,1,10,0x1894b40,15,0x18954b0,0x1895bb0,15,0x7ffd9d7078b0,-1,0) 390ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZUNMQR(L,C,15,1,10,0x1894b40,15,0x18954b0,0x1895bb0,15,0x1895d60,10,0) 3.09us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZTRTRS(U,N,N,10,1,0x1894b40,15,0x1895bb0,15,0) 1.05us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt">MKL_VERBOSE ZAXPY(10,0x7ffd9d7078f0,0x187fc30,1,0x1880c70,1) 257ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1</span><span lang="EN-US" style="font-size:11.0pt"><br>
<br>
Yes, for petsc example, there are MKL outputs, but for my own program. All I did is to change the matrix type from MATAIJ to MATAIJMKL to get optimized performance for spmv from MKL. Should I expect to see any MKL outputs in this case?</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</blockquote>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">Are you sure that the type changed? You can MatView() the matrix with format ascii_info to see.<o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> Thanks,<o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> Matt<o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
</div>
<blockquote style="border:none;border-left:solid #CCCCCC 1.0pt;padding:0cm 0cm 0cm 6.0pt;margin-left:4.8pt;margin-top:5.0pt;margin-right:0cm;margin-bottom:5.0pt;border-top:currentcolor;border-right:currentcolor;border-bottom:currentcolor">
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">Thanks,</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">Yongzhong</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt"> </span><o:p></o:p></p>
</div>
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322mail-editor-reference-message-container">
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;margin-bottom:12.0pt"><b>From: </b>Junchao Zhang <<a href="mailto:junchao.zhang@gmail.com" target="_blank">junchao.zhang@gmail.com</a>><br>
<b>Date: </b>Saturday, June 22, 2024 at 9:40<span style="font-family:"Arial",sans-serif"> </span>AM<br>
<b>To: </b>Yongzhong Li <<a href="mailto:yongzhong.li@mail.utoronto.ca" target="_blank">yongzhong.li@mail.utoronto.ca</a>><br>
<b>Cc: </b>Pierre Jolivet <<a href="mailto:pierre@joliv.et" target="_blank">pierre@joliv.et</a>>, <a href="mailto:petsc-users@mcs.anl.gov" target="_blank">petsc-users@mcs.anl.gov</a> <<a href="mailto:petsc-users@mcs.anl.gov" target="_blank">petsc-users@mcs.anl.gov</a>><br>
<b>Subject: </b>Re: [petsc-users] [petsc-maint] Assistance Needed with PETSc KSPSolve Performance Issue<o:p></o:p></p>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">No, you don't. It is strange. Perhaps you can you run a petsc example first and see if MKL is really used<o:p></o:p></p>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-family:"Courier New"">$ cd src/mat/tests</span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-family:"Courier New"">$ make ex1</span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-family:"Courier New"">$ MKL_VERBOSE=1 ./ex1</span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><br clear="all">
<o:p></o:p></p>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">--Junchao Zhang<o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">On Fri, Jun 21, 2024 at 4:03<span style="font-family:"Arial",sans-serif"> </span>PM Yongzhong Li <<a href="mailto:yongzhong.li@mail.utoronto.ca" target="_blank">yongzhong.li@mail.utoronto.ca</a>>
wrote:<o:p></o:p></p>
</div>
</div>
<blockquote style="border:none;border-left:solid #CCCCCC 1.0pt;padding:0cm 0cm 0cm 6.0pt;margin-left:4.8pt;margin-top:5.0pt;margin-right:0cm;margin-bottom:5.0pt;border-top:currentcolor;border-right:currentcolor;border-bottom:currentcolor">
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">I am using<br>
<br>
export MKL_VERBOSE=1</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">./xx<br>
<br>
in the bash file, do I have to use -</span><span lang="EN-US"> </span>ksp_converged_reason<span lang="EN-US">?<br>
<br>
Thanks,</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US">Yongzhong</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt"> </span><o:p></o:p></p>
</div>
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580mail-editor-reference-message-container">
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;margin-bottom:12.0pt"><b>From: </b>Pierre Jolivet <<a href="mailto:pierre@joliv.et" target="_blank">pierre@joliv.et</a>><br>
<b>Date: </b>Friday, June 21, 2024 at 1:47<span style="font-family:"Arial",sans-serif"> </span>PM<br>
<b>To: </b>Yongzhong Li <<a href="mailto:yongzhong.li@mail.utoronto.ca" target="_blank">yongzhong.li@mail.utoronto.ca</a>><br>
<b>Cc: </b>Junchao Zhang <<a href="mailto:junchao.zhang@gmail.com" target="_blank">junchao.zhang@gmail.com</a>>, <a href="mailto:petsc-users@mcs.anl.gov" target="_blank">petsc-users@mcs.anl.gov</a> <<a href="mailto:petsc-users@mcs.anl.gov" target="_blank">petsc-users@mcs.anl.gov</a>><br>
<b>Subject: </b>Re: [petsc-users] [petsc-maint] Assistance Needed with PETSc KSPSolve Performance Issue<o:p></o:p></p>
</div>
<table class="MsoNormalTable" border="0" cellspacing="0" cellpadding="0" align="left" width="100%" style="width:100.0%;display:table;float:none">
<tbody>
<tr>
<td style="background:#A6A6A6;padding:5.25pt 1.5pt 5.25pt 1.5pt"></td>
<td width="100%" style="width:100.0%;background:#EAEAEA;padding:5.25pt 3.75pt 5.25pt 11.25pt">
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;mso-element:frame;mso-element-frame-hspace:2.25pt;mso-element-wrap:around;mso-element-anchor-vertical:paragraph;mso-element-anchor-horizontal:column;mso-height-rule:exactly">
<span lang="ZH-CN" style="font-size:9.0pt;font-family:DengXian;color:#212121">你通常不会收到来自</span><span style="font-size:9.0pt;font-family:"Segoe UI",sans-serif;color:#212121"> </span><span style="color:black"><a href="mailto:pierre@joliv.et" target="_blank"><span style="font-size:9.0pt;font-family:"Segoe UI",sans-serif">pierre@joliv.et</span></a></span><span style="font-size:9.0pt;font-family:"Segoe UI",sans-serif;color:#212121"> </span><span lang="ZH-CN" style="font-size:9.0pt;font-family:DengXian;color:#212121">的电子邮件。</span><span style="color:black"><a href="https://urldefense.us/v3/__https://aka.ms/LearnAboutSenderIdentification__;!!G_uCfscf7eWS!flsZMI97ne0yyxHhLda3hROB9qsgstuZS-jPinxGIzFCCSdn1ujdoMR8dyz-5_kVqqMM-12Lt0dTdjKrx3wXhHZmBhNydvFQeSY$" target="_blank"><span lang="ZH-CN" style="font-size:9.0pt;font-family:DengXian">了解这一点为什么很重要</span></a></span><o:p></o:p></p>
</div>
</div>
</td>
<td width="75" style="width:56.25pt;background:#EAEAEA;padding:5.25pt 3.75pt 5.25pt 3.75pt">
</td>
</tr>
</tbody>
</table>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">How do you set the variable?<o:p></o:p></p>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">$ MKL_VERBOSE=1 ./ex1 -ksp_converged_reason<o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">MKL_VERBOSE oneMKL 2024.0 Update 1 Product build 20240215 for Intel(R) 64 architecture Intel(R) Advanced Vector Extensions 2 (Intel(R) AVX2) enabled processors, Lnx 2.80GHz lp64
intel_thread<o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">MKL_VERBOSE DDOT(10,0x22127c0,1,0x22127c0,1) 2.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1<o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">MKL_VERBOSE DSCAL(10,0x7ffc9fb4ff08,0x22127c0,1) 12.67us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1<o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">MKL_VERBOSE DDOT(10,0x22127c0,1,0x2212840,1) 1.52us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1<o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">MKL_VERBOSE DDOT(10,0x2212840,1,0x2212840,1) 167ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:1<o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">[...]<o:p></o:p></p>
</div>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;margin-bottom:12.0pt"> <o:p></o:p></p>
<blockquote style="margin-top:5.0pt;margin-bottom:5.0pt">
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">On 21 Jun 2024, at 7:37<span style="font-family:"Arial",sans-serif"> </span>PM, Yongzhong Li <<a href="mailto:yongzhong.li@mail.utoronto.ca" target="_blank">yongzhong.li@mail.utoronto.ca</a>>
wrote:<o:p></o:p></p>
</div>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
<div>
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580pfptBannermu5fqka">
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580pfptBannermu5fqka">
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580pfptBannermu5fqka">
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-family:"Arial",sans-serif">This Message Is From an External Sender </span><o:p></o:p></p>
</div>
</div>
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580pfptBannermu5fqka">
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-family:"Arial",sans-serif">This message came from outside your organization.</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt">Hello all,<br>
<br>
I set MKL_VERBOSE = 1, but observed no print output specific to the use of MKL. Does PETSc enable this verbose output?<br>
<br>
Best,</span><o:p></o:p></p>
</div>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;margin-bottom:12.0pt"><span lang="EN-US" style="font-size:11.0pt">Yongzhong</span><o:p></o:p></p>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt"> </span><o:p></o:p></p>
</div>
</div>
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580mail-editor-reference-message-container">
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;margin-bottom:12.0pt"><b>From: </b>Pierre Jolivet <<a href="mailto:pierre@joliv.et" target="_blank">pierre@joliv.et</a>><br>
<b>Date: </b>Friday, June 21, 2024 at 1:36<span style="font-family:"Arial",sans-serif"> </span>AM<br>
<b>To: </b>Junchao Zhang <<a href="mailto:junchao.zhang@gmail.com" target="_blank">junchao.zhang@gmail.com</a>><br>
<b>Cc: </b>Yongzhong Li <<a href="mailto:yongzhong.li@mail.utoronto.ca" target="_blank">yongzhong.li@mail.utoronto.ca</a>>, <a href="mailto:petsc-users@mcs.anl.gov" target="_blank">petsc-users@mcs.anl.gov</a> <<a href="mailto:petsc-users@mcs.anl.gov" target="_blank">petsc-users@mcs.anl.gov</a>><br>
<b>Subject: </b>Re: [petsc-users] [petsc-maint] Assistance Needed with PETSc KSPSolve Performance Issue<o:p></o:p></p>
</div>
<table class="MsoNormalTable" border="0" cellspacing="0" cellpadding="0" align="left" width="100%" style="width:100.0%;display:table;float:none">
<tbody>
<tr>
<td style="background:#A6A6A6;padding:5.25pt 1.5pt 5.25pt 1.5pt"></td>
<td width="100%" style="width:100.0%;background:#EAEAEA;padding:5.25pt 3.75pt 5.25pt 11.25pt">
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;mso-element:frame;mso-element-frame-hspace:2.25pt;mso-element-wrap:around;mso-element-anchor-vertical:paragraph;mso-element-anchor-horizontal:column;mso-height-rule:exactly">
<span lang="ZH-CN" style="font-size:9.0pt;font-family:DengXian;color:#212121">你通常不会收到来自</span><span style="font-size:9.0pt;font-family:"Segoe UI",sans-serif;color:#212121"> </span><span style="color:black"><a href="mailto:pierre@joliv.et" target="_blank"><span style="font-size:9.0pt;font-family:"Segoe UI",sans-serif">pierre@joliv.et</span></a></span><span style="font-size:9.0pt;font-family:"Segoe UI",sans-serif;color:#212121"> </span><span lang="ZH-CN" style="font-size:9.0pt;font-family:DengXian;color:#212121">的电子邮件。</span><span style="color:black"><a href="https://urldefense.us/v3/__https://aka.ms/LearnAboutSenderIdentification__;!!G_uCfscf7eWS!eXBeeIXo9Yqgp2nypqwKYimLnGBZXnF4dXxgLM1UoOIO6n8nt3XlfgjVWLPWJh4UOa5NNpx-nrJb_H828XRQKUREfR2m69oCbxI$" target="_blank"><span lang="ZH-CN" style="font-size:9.0pt;font-family:DengXian">了解这一点为什么很重要</span></a></span><o:p></o:p></p>
</div>
</div>
</div>
</td>
<td width="75" style="width:56.25pt;background:#EAEAEA;padding:5.25pt 3.75pt 5.25pt 3.75pt">
</td>
</tr>
</tbody>
</table>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;margin-bottom:12.0pt"> <o:p></o:p></p>
</div>
<blockquote style="margin-top:5.0pt;margin-bottom:5.0pt">
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">On 21 Jun 2024, at 6:42<span style="font-family:"Arial",sans-serif"> </span>AM, Junchao Zhang <<a href="mailto:junchao.zhang@gmail.com" target="_blank">junchao.zhang@gmail.com</a>>
wrote:<o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
</div>
<div>
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580pfptBanners71iuax">
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580pfptBanners71iuax">
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580pfptBanners71iuax">
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-family:"Arial",sans-serif">This Message Is From an External Sender</span><o:p></o:p></p>
</div>
</div>
</div>
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580pfptBanners71iuax">
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-family:"Arial",sans-serif">This message came from outside your organization.</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">I remember there are some MKL env vars to print MKL routines called. </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</blockquote>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">The environment variable is MKL_VERBOSE<o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">Thanks,<o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">Pierre<o:p></o:p></p>
</div>
</div>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;margin-bottom:12.0pt"> <o:p></o:p></p>
</div>
<blockquote style="margin-top:5.0pt;margin-bottom:5.0pt">
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">Maybe we can try it to see what MKL routines are really used and then we can understand why some petsc functions did not speed
up </span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"><br clear="all">
</span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">--Junchao Zhang</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">On Thu, Jun 20, 2024 at 10:39 PM Yongzhong Li <</span><a href="mailto:yongzhong.li@mail.utoronto.ca" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">yongzhong.li@mail.utoronto.ca</span></a><span style="font-size:9.0pt;font-family:Helvetica">>
wrote:</span><o:p></o:p></p>
</div>
</div>
</div>
<blockquote style="border:none;border-left:solid #CCCCCC 1.0pt;padding:0cm 0cm 0cm 6.0pt;margin-left:4.8pt;margin-top:5.0pt;margin-right:0cm;margin-bottom:5.0pt;border-top:currentcolor;border-right:currentcolor;border-bottom:currentcolor">
<div>
<div style="margin-top:12.0pt;margin-bottom:12.0pt;border-width:initial;border-style:initial;border-color:currentcolor;border-radius:4px;min-width:200px" id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580m_-4861803116733450987pfptBannerqdehbik">
<div style="margin-bottom:.75pt;float:left" id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580m_-4861803116733450987pfptBannerqdehbik">
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580m_-4861803116733450987pfptBannerqdehbik">
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;line-height:13.5pt;background:#D0D8DC">
<b><span style="font-size:10.5pt;font-family:"Arial",sans-serif;color:black">This Message Is From an External Sender</span></b><o:p></o:p></p>
</div>
</div>
</div>
<div style="margin-top:1.5pt" id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580m_-4861803116733450987pfptBannerqdehbik">
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;line-height:13.5pt;background:#D0D8DC">
<span style="font-size:9.0pt;font-family:"Arial",sans-serif;color:black">This message came from outside your organization.</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;background:#D0D8DC">
<span style="font-size:1.0pt;font-family:Helvetica;color:black"> </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica">Hi Barry, sorry for my last results. I didn’t fully understand the stage profiling and logging in PETSc, now I
only record KSPSolve() stage of my program. Some sample codes are as follow,<br>
<br>
// Static variable to keep track of the stage counter</span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica"> static int stageCounter = 1;</span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica"> // Generate a unique stage name</span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica"> std::ostringstream oss;</span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica"> oss << "Stage " << stageCounter << " of Code";</span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica"> std::string stageName = oss.str();</span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica"> // Register the stage</span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica"> PetscLogStage stagenum;</span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica"> PetscLogStageRegister(stageName.c_str(), &stagenum);</span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica"> PetscLogStagePush(stagenum);</span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica"> <b>KSPSolve(*ksp_ptr, b, x);</b></span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica"> PetscLogStagePop();</span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica"> stageCounter++;<br>
<br>
I have attached my new logging results, there are 1 main stage and 4 other stages where each one is KSPSolve() call.<br>
<br>
To provide some additional backgrounds, if you recall, I have been trying to get efficient iterative solution using multithreading. I found out by compiling PETSc with Intel MKL library instead of OpenBLAS, I am able to perform sparse matrix-vector multiplication
faster, I am using MATSEQAIJMKL. This makes the shell matrix vector product in each iteration scale well with the #of threads. However, I found out the total GMERS solve time (~KSPSolve() time) is not scaling well the #of threads.<br>
<br>
>From the logging results I learned that when performing KSPSolve(), there are some CPU overheads in PCApply() and KSPGMERSOrthog(). I ran my programs using different number of threads and plotted the time consumption for PCApply() and KSPGMERSOrthog() against
#of thread. I found out these two operations are not scaling with the threads at all! My results are attached as the pdf to give you a clear view.<br>
<br>
My questions is,<br>
<br>
>From my understanding, in PCApply, MatSolve() is involved, KSPGMERSOrthog() will have many vector operations, so why these two parts can’t scale well with the # of threads when the intel MKL library is linked?<br>
<br>
Thank you,<br>
Yongzhong</span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580m_-4861803116733450987mail-editor-reference-message-container">
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;margin-bottom:12.0pt"><b><span style="font-size:9.0pt;font-family:Helvetica">From: </span></b><span style="font-size:9.0pt;font-family:Helvetica">Barry Smith <</span><a href="mailto:bsmith@petsc.dev" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">bsmith@petsc.dev</span></a><span style="font-size:9.0pt;font-family:Helvetica">><br>
<b>Date: </b>Friday, June 14, 2024 at 11:36</span><span style="font-size:9.0pt;font-family:"Arial",sans-serif"> </span><span style="font-size:9.0pt;font-family:Helvetica">AM<br>
<b>To: </b>Yongzhong Li <</span><a href="mailto:yongzhong.li@mail.utoronto.ca" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">yongzhong.li@mail.utoronto.ca</span></a><span style="font-size:9.0pt;font-family:Helvetica">><br>
<b>Cc: </b></span><a href="mailto:petsc-users@mcs.anl.gov" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">petsc-users@mcs.anl.gov</span></a><span style="font-size:9.0pt;font-family:Helvetica"> <</span><a href="mailto:petsc-users@mcs.anl.gov" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">petsc-users@mcs.anl.gov</span></a><span style="font-size:9.0pt;font-family:Helvetica">>, </span><a href="mailto:petsc-maint@mcs.anl.gov" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">petsc-maint@mcs.anl.gov</span></a><span style="font-size:9.0pt;font-family:Helvetica"> <</span><a href="mailto:petsc-maint@mcs.anl.gov" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">petsc-maint@mcs.anl.gov</span></a><span style="font-size:9.0pt;font-family:Helvetica">>,
Piero Triverio <</span><a href="mailto:piero.triverio@utoronto.ca" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">piero.triverio@utoronto.ca</span></a><span style="font-size:9.0pt;font-family:Helvetica">><br>
<b>Subject: </b>Re: [petsc-maint] Assistance Needed with PETSc KSPSolve Performance Issue</span><o:p></o:p></p>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> I am a bit confused. Without the initial guess computation, there are still a bunch of events I don't understand </span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">MatTranspose 79 1.0 4.0598e+01 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0</span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">MatMatMultSym 110 1.0 1.7419e+02 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 1 0 0 0 0 1 0 0 0 0 0</span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">MatMatMultNum 90 1.0 1.2640e+02 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 1 0 0 0 0 1 0 0 0 0 0</span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">MatMatMatMultSym 20 1.0 1.3049e+02 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 1 0 0 0 0 1 0 0 0 0 0</span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">MatRARtSym 25 1.0 1.2492e+02 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 1 0 0 0 0 1 0 0 0 0 0</span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">MatMatTrnMultSym 25 1.0 8.8265e+01 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0</span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">MatMatTrnMultNum 25 1.0 2.4820e+02 1.0 6.83e+10 1.0 0.0e+00 0.0e+00 0.0e+00 1 0 0 0 0 1 0 0 0 0 275</span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">MatTrnMatMultSym 10 1.0 7.2984e-01 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0</span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">MatTrnMatMultNum 10 1.0 9.3128e-01 1.0 0.00e+00 0.0 0.0e+00 0.0e+00 0.0e+00 0 0 0 0 0 0 0 0 0 0 0</span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">in addition there are many more VecMAXPY then VecMDot (in GMRES they are each done the same number of times)</span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">VecMDot 5588 1.0 1.7183e+03 1.0 2.06e+13 1.0 0.0e+00 0.0e+00 0.0e+00 8 10 0 0 0 8 10 0 0 0 12016</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">VecMAXPY 22412 1.0 8.4898e+03 1.0 4.17e+13 1.0 0.0e+00 0.0e+00 0.0e+00 39 20 0 0 0 39 20 0 0 0 4913</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">Finally there are a huge number of </span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">MatMultAdd 258048 1.0 1.4178e+03 1.0 6.10e+13 1.0 0.0e+00 0.0e+00 0.0e+00 7 29 0 0 0 7 29 0 0 0 43025</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">Are you making calls to all these routines? Are you doing this inside your MatMult() or before you call KSPSolve?</span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">The reason I wanted you to make a simpler run without the initial guess code is that your events are far more complicated than
would be produced by GMRES alone so it is not possible to understand the behavior you are seeing without fully understanding all the events happening in the code.</span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> Barry</span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;margin-bottom:12.0pt"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
<blockquote style="margin-top:5.0pt;margin-bottom:5.0pt">
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">On Jun 14, 2024, at 1:19</span><span style="font-size:9.0pt;font-family:"Arial",sans-serif"> </span><span style="font-size:9.0pt;font-family:Helvetica">AM,
Yongzhong Li <</span><a href="mailto:yongzhong.li@mail.utoronto.ca" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">yongzhong.li@mail.utoronto.ca</span></a><span style="font-size:9.0pt;font-family:Helvetica">> wrote:</span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica">Thanks, I have attached the results without using any KSPGuess. At low frequency, the iteration steps are quite
close to the one with KSPGuess, specifically <br>
<br>
KSPGuess Object: 1 MPI process</span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica"> type: fischer</span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica"> Model 1, size 200<br>
<br>
However, I found at higher frequency, the # of iteration steps are significant higher than the one with KSPGuess, I have attahced both of the results for your reference.<br>
<br>
Moreover, could I ask why the one without the KSPGuess options can be used for a baseline comparsion? What are we comparing here? How does it relate to the performance issue/bottleneck I found? “</span><b><span style="font-size:9.0pt;font-family:Helvetica">I
have noticed that the time taken by </span></b><b><span style="font-size:9.0pt;font-family:"Courier New"">KSPSolve</span></b><b><span style="font-size:9.0pt;font-family:Helvetica"> is </span></b><b><span lang="EN-US" style="font-size:9.0pt;font-family:Helvetica">almost
two times </span></b><b><span style="font-size:9.0pt;font-family:Helvetica">greater than the CPU time for matrix-vector product multiplied by the number of iteration</span></b><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica">” <br>
<br>
Thank you!<br>
Yongzhong</span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580m_-4861803116733450987mail-editor-reference-message-container">
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;margin-bottom:12.0pt"><b><span style="font-size:9.0pt;font-family:Helvetica">From: </span></b><span style="font-size:9.0pt;font-family:Helvetica">Barry Smith <</span><a href="mailto:bsmith@petsc.dev" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">bsmith@petsc.dev</span></a><span style="font-size:9.0pt;font-family:Helvetica">><br>
<b>Date: </b>Thursday, June 13, 2024 at 2:14</span><span style="font-size:9.0pt;font-family:"Arial",sans-serif"> </span><span style="font-size:9.0pt;font-family:Helvetica">PM<br>
<b>To: </b>Yongzhong Li <</span><a href="mailto:yongzhong.li@mail.utoronto.ca" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">yongzhong.li@mail.utoronto.ca</span></a><span style="font-size:9.0pt;font-family:Helvetica">><br>
<b>Cc: </b></span><a href="mailto:petsc-users@mcs.anl.gov" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">petsc-users@mcs.anl.gov</span></a><span style="font-size:9.0pt;font-family:Helvetica"> <</span><a href="mailto:petsc-users@mcs.anl.gov" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">petsc-users@mcs.anl.gov</span></a><span style="font-size:9.0pt;font-family:Helvetica">>, </span><a href="mailto:petsc-maint@mcs.anl.gov" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">petsc-maint@mcs.anl.gov</span></a><span style="font-size:9.0pt;font-family:Helvetica"> <</span><a href="mailto:petsc-maint@mcs.anl.gov" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">petsc-maint@mcs.anl.gov</span></a><span style="font-size:9.0pt;font-family:Helvetica">>,
Piero Triverio <</span><a href="mailto:piero.triverio@utoronto.ca" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">piero.triverio@utoronto.ca</span></a><span style="font-size:9.0pt;font-family:Helvetica">><br>
<b>Subject: </b>Re: [petsc-maint] Assistance Needed with PETSc KSPSolve Performance Issue</span><o:p></o:p></p>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> Can you please run the same thing without the KSPGuess option(s) for a baseline comparison?</span><o:p></o:p></p>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> Thanks</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> Barry</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;margin-bottom:12.0pt"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
<blockquote style="margin-top:5.0pt;margin-bottom:5.0pt">
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">On Jun 13, 2024, at 1:27</span><span style="font-size:9.0pt;font-family:"Arial",sans-serif"> </span><span style="font-size:9.0pt;font-family:Helvetica">PM,
Yongzhong Li <</span><a href="mailto:yongzhong.li@mail.utoronto.ca" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">yongzhong.li@mail.utoronto.ca</span></a><span style="font-size:9.0pt;font-family:Helvetica">> wrote:</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580m_-4861803116733450987pfptBannerxs2204y">
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580m_-4861803116733450987pfptBannerxs2204y">
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580m_-4861803116733450987pfptBannerxs2204y">
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:"Arial",sans-serif">This Message Is From an External Sender</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580m_-4861803116733450987pfptBannerxs2204y">
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:"Arial",sans-serif">This message came from outside your organization.</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:Helvetica">Hi Matt,<br>
<br>
I have rerun the program with the keys you provided. The system output when performing ksp solve and the final petsc log output were stored in a .txt file attached for your reference.<br>
<br>
Thanks!<br>
Yongzhong</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:11.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580m_-4861803116733450987mail-editor-reference-message-container">
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;margin-bottom:12.0pt"><b><span style="font-size:9.0pt;font-family:Helvetica">From: </span></b><span style="font-size:9.0pt;font-family:Helvetica">Matthew Knepley <</span><a href="mailto:knepley@gmail.com" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">knepley@gmail.com</span></a><span style="font-size:9.0pt;font-family:Helvetica">><br>
<b>Date: </b>Wednesday, June 12, 2024 at 6:46</span><span style="font-size:9.0pt;font-family:"Arial",sans-serif"> </span><span style="font-size:9.0pt;font-family:Helvetica">PM<br>
<b>To: </b>Yongzhong Li <</span><a href="mailto:yongzhong.li@mail.utoronto.ca" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">yongzhong.li@mail.utoronto.ca</span></a><span style="font-size:9.0pt;font-family:Helvetica">><br>
<b>Cc: </b></span><a href="mailto:petsc-users@mcs.anl.gov" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">petsc-users@mcs.anl.gov</span></a><span style="font-size:9.0pt;font-family:Helvetica"> <</span><a href="mailto:petsc-users@mcs.anl.gov" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">petsc-users@mcs.anl.gov</span></a><span style="font-size:9.0pt;font-family:Helvetica">>, </span><a href="mailto:petsc-maint@mcs.anl.gov" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">petsc-maint@mcs.anl.gov</span></a><span style="font-size:9.0pt;font-family:Helvetica"> <</span><a href="mailto:petsc-maint@mcs.anl.gov" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">petsc-maint@mcs.anl.gov</span></a><span style="font-size:9.0pt;font-family:Helvetica">>,
Piero Triverio <</span><a href="mailto:piero.triverio@utoronto.ca" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">piero.triverio@utoronto.ca</span></a><span style="font-size:9.0pt;font-family:Helvetica">><br>
<b>Subject: </b>Re: [petsc-maint] Assistance Needed with PETSc KSPSolve Performance Issue</span><o:p></o:p></p>
</div>
<table class="MsoNormalTable" border="0" cellspacing="0" cellpadding="0" align="left" width="100%" style="width:100.0%;display:table;float:none">
<tbody>
<tr>
<td style="background:#A6A6A6;padding:5.25pt 1.5pt 5.25pt 1.5pt"></td>
<td width="100%" style="width:100.0%;background:#EAEAEA;padding:5.25pt 3.75pt 5.25pt 11.25pt">
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;mso-element:frame;mso-element-frame-hspace:2.25pt;mso-element-wrap:around;mso-element-anchor-vertical:paragraph;mso-element-anchor-horizontal:column;mso-height-rule:exactly">
<span lang="ZH-CN" style="font-size:9.0pt;font-family:DengXian;color:#212121">你通常不会收到来自</span><span style="font-size:9.0pt;font-family:"Segoe UI",sans-serif;color:#212121"> </span><span style="color:black"><a href="mailto:knepley@gmail.com" target="_blank"><span style="font-size:9.0pt;font-family:"Segoe UI",sans-serif">knepley@gmail.com</span></a></span><span style="font-size:9.0pt;font-family:"Segoe UI",sans-serif;color:#212121"> </span><span lang="ZH-CN" style="font-size:9.0pt;font-family:DengXian;color:#212121">的电子邮件。</span><span style="color:black"><a href="https://urldefense.us/v3/__https://aka.ms/LearnAboutSenderIdentification__;!!G_uCfscf7eWS!djGfJnEhNJROfsMsBJy5u_KoRKbug55xZ64oHKUFnH2cWku_Th1hwt4TDdoMd8pWYVDzJeqJslMNZwpO3y0Et94d31qk-oCEwo4$" target="_blank"><span lang="ZH-CN" style="font-size:9.0pt;font-family:DengXian">了解这一点为什么很重要</span></a></span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
</td>
<td width="75" style="width:56.25pt;background:#EAEAEA;padding:5.25pt 3.75pt 5.25pt 3.75pt">
</td>
</tr>
</tbody>
</table>
<div>
<div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">On Wed, Jun 12, 2024 at 6:36</span><span style="font-size:9.0pt;font-family:"Arial",sans-serif"> </span><span style="font-size:9.0pt;font-family:Helvetica">PM
Yongzhong Li <</span><a href="mailto:yongzhong.li@mail.utoronto.ca" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">yongzhong.li@mail.utoronto.ca</span></a><span style="font-size:9.0pt;font-family:Helvetica">> wrote:</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div>
<blockquote style="border:none;border-left:solid #CCCCCC 1.0pt;padding:0cm 0cm 0cm 6.0pt;margin-left:4.8pt;margin-top:5.0pt;margin-right:0cm;margin-bottom:5.0pt;border-top:currentcolor;border-right:currentcolor;border-bottom:currentcolor">
<div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:1.0pt;font-family:Helvetica;color:white">Dear PETSc’s developers, I hope this email finds you well. I am currently working on a project using PETSc and have
encountered a performance issue with the KSPSolve function. Specifically, I have noticed that the time taken by KSPSolve is </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:1.0pt;font-family:Helvetica;color:white">ZjQcmQRYFpfptBannerStart</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div style="margin-top:12.0pt;margin-bottom:12.0pt;border-width:initial;border-style:initial;border-color:currentcolor;border-radius:4px;min-width:200px" id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580m_-4861803116733450987m_5223539340070699115pfptBannerpeukvww">
<div style="margin-bottom:.75pt;float:left" id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580m_-4861803116733450987m_5223539340070699115pfptBannerpeukvww">
<div id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580m_-4861803116733450987m_5223539340070699115pfptBannerpeukvww">
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;line-height:13.5pt;background:#D0D8DC">
<b><span style="font-size:10.5pt;font-family:"Arial",sans-serif;color:black">This Message Is From an External Sender</span></b><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div style="margin-top:1.5pt" id="m_-5979530742528590611m_-7305460844614688919m_2643581689528628203m_6159968039748304261m_1825240119000957322m_-6075196701903926580m_-4861803116733450987m_5223539340070699115pfptBannerpeukvww">
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;line-height:13.5pt;background:#D0D8DC">
<span style="font-size:9.0pt;font-family:"Arial",sans-serif;color:black">This message came from outside your organization.</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;background:#D0D8DC">
<span style="font-size:1.0pt;font-family:Helvetica;color:black"> </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:1.0pt;font-family:Helvetica;color:white">ZjQcmQRYFpfptBannerEnd</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">Dear </span><span lang="EN-US" style="font-size:9.0pt;font-family:Helvetica">PETSc’s developers</span><span style="font-size:9.0pt;font-family:Helvetica">,</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">I hope this email finds you well.</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">I am currently working on a project using PETSc and have encountered a performance issue with the </span><span style="font-size:9.0pt;font-family:"Courier New"">KSPSolve</span><span style="font-size:9.0pt;font-family:Helvetica"> function.
Specifically, <b>I have noticed that the time taken by </b></span><b><span style="font-size:9.0pt;font-family:"Courier New"">KSPSolve</span></b><b><span style="font-size:9.0pt;font-family:Helvetica"> is </span></b><b><span lang="EN-US" style="font-size:9.0pt;font-family:Helvetica">almost
two times </span></b><b><span style="font-size:9.0pt;font-family:Helvetica">greater than the CPU time for matrix-vector product multiplied by the number of iteration steps</span></b><span style="font-size:9.0pt;font-family:Helvetica">. </span><span lang="EN-US" style="font-size:9.0pt;font-family:Helvetica">I
use C++ chrono to record CPU time.</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">For context, I am using a shell system matrix</span><span lang="EN-US" style="font-size:9.0pt;font-family:Helvetica"> A</span><span style="font-size:9.0pt;font-family:Helvetica">.
Despite my efforts to </span><span lang="EN-US" style="font-size:9.0pt;font-family:Helvetica">parallelize</span><span style="font-size:9.0pt;font-family:Helvetica"> the matrix-vector product</span><span lang="EN-US" style="font-size:9.0pt;font-family:Helvetica"> (Ax)</span><span style="font-size:9.0pt;font-family:Helvetica">,
the overall solve time remains higher</span><span lang="EN-US" style="font-size:9.0pt;font-family:Helvetica"> than the matrix vector product per iteration indicates when multiple threads were used</span><span style="font-size:9.0pt;font-family:Helvetica">.
Here are a few details of my setup:</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<ul type="disc">
<li class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;mso-list:l0 level1 lfo1">
<b><span style="font-size:9.0pt;font-family:Helvetica">Matrix Type</span></b><span style="font-size:9.0pt;font-family:Helvetica">: Shell system matrix</span><o:p></o:p></li><li class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;mso-list:l0 level1 lfo1">
<b><span style="font-size:9.0pt;font-family:Helvetica">Preconditioner</span></b><span style="font-size:9.0pt;font-family:Helvetica">: </span><span lang="EN-US" style="font-size:9.0pt;font-family:Helvetica">Shell PC</span><o:p></o:p></li><li class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;mso-list:l0 level1 lfo1">
<b><span style="font-size:9.0pt;font-family:Helvetica">Parallel Environment</span></b><span style="font-size:9.0pt;font-family:Helvetica">: </span><span lang="EN-US" style="font-size:9.0pt;font-family:Helvetica">Using Intel MKL as PETSc’s BLAS/LAPACK library,
multithreading is enabled</span><o:p></o:p></li></ul>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:9.0pt;font-family:Helvetica">I have considered several potential reasons, such as preconditioner setup, additional solver operations, and the
inherent overhead of using a shell system matrix. <b>However, since KSPSolve is a high-level API, I have been unable to pinpoint the exact cause of the increased solve time.</b></span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:9.0pt;font-family:Helvetica">Have you observed the same issue? </span><span style="font-size:9.0pt;font-family:Helvetica">Could you</span><span lang="EN-US" style="font-size:9.0pt;font-family:Helvetica"> please </span><span style="font-size:9.0pt;font-family:Helvetica">provide
some </span><span lang="EN-US" style="font-size:9.0pt;font-family:Helvetica">experience </span><span style="font-size:9.0pt;font-family:Helvetica">on how to diagnose and address this performance discrepancy? Any insights or recommendations you could offer
would be greatly appreciated.</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</blockquote>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">For any performance question like this, we need to see the output of your code run with</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> -ksp_view -ksp_monitor_true_residual -ksp_converged_reason -log_view</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> Thanks,</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> Matt</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<blockquote style="border:none;border-left:solid #CCCCCC 1.0pt;padding:0cm 0cm 0cm 6.0pt;margin-left:4.8pt;margin-top:5.0pt;margin-right:0cm;margin-bottom:5.0pt;border-top:currentcolor;border-right:currentcolor;border-bottom:currentcolor">
<div>
<div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">Thank you for your time and assistance.</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">Best</span><span lang="EN-US" style="font-size:9.0pt;font-family:Helvetica"> regards,</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:9.0pt;font-family:Helvetica">Yongzhong</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:"Calibri",sans-serif">-----------------------------------------------------------</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><b><span lang="EN-US" style="font-size:11.0pt;font-family:"Calibri",sans-serif">Yongzhong Li</span></b><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:"Calibri",sans-serif">PhD student | Electromagnetics Group</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:"Calibri",sans-serif">Department of Electrical & Computer Engineering</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span lang="EN-US" style="font-size:11.0pt;font-family:"Calibri",sans-serif">University of Toronto</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><a href="https://urldefense.us/v3/__http://www.modelics.org__;!!G_uCfscf7eWS!cuLttMJEcegaqu461Bt4QLsO4fASfLM5vjRbtyNhWJQiInbjgNwkGNdkFE1ebSbFjOUatYB0-jd2yQWMWzqkDFFjwMvNl3ZKAr8$" target="_blank"><span lang="EN-US" style="font-size:11.0pt;font-family:"Calibri",sans-serif;color:#0563C1">http://www.modelics.org</span></a><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</blockquote>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"><br clear="all">
</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">-- </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica">What most experimenters take for granted before they begin their experiments is infinitely more interesting than any results
to which their experiments lead.<br>
-- Norbert Wiener</span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><a href="https://urldefense.us/v3/__http://www.cse.buffalo.edu/*knepley/__;fg!!G_uCfscf7eWS!djGfJnEhNJROfsMsBJy5u_KoRKbug55xZ64oHKUFnH2cWku_Th1hwt4TDdoMd8pWYVDzJeqJslMNZwpO3y0Et94d31qkNOuenGA$" target="_blank"><span style="font-size:9.0pt;font-family:Helvetica">https://www.cse.buffalo.edu/~knepley/</span></a><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"><ksp_petsc_log.txt></span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</blockquote>
</div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"> </span><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span style="font-size:9.0pt;font-family:Helvetica"><ksp_petsc_log.txt><ksp_petsc_log_noguess.txt></span><o:p></o:p></p>
</div>
</div>
</div>
</blockquote>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</blockquote>
</div>
</div>
</blockquote>
</div>
</div>
</div>
</div>
</div>
</div>
</blockquote>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</blockquote>
</div>
</div>
</div>
</div>
</div>
</div>
</blockquote>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><br clear="all">
<o:p></o:p></p>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">-- <o:p></o:p></p>
</div>
<div>
<div>
<div>
<div>
<div>
<div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">What most experimenters take for granted before they begin their experiments is infinitely more interesting than any results to which their experiments lead.<br>
-- Norbert Wiener<o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><a href="https://urldefense.us/v3/__http://www.cse.buffalo.edu/*knepley/__;fg!!G_uCfscf7eWS!fVvbGldqcUV5ju4jpu5oGmt-VjITi5JpCJzhHxpbgsERLVYZzglpxKOOyrBRGxjRxp7vWHwt3SnINFOQErR1Z8kcDcf3cNeD9Gw$" target="_blank">https://www.cse.buffalo.edu/~knepley/</a><o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</blockquote>
</div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
</blockquote>
</div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><br clear="all">
<o:p></o:p></p>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><span class="m-5979530742528590611gmailsignatureprefix">--
</span><o:p></o:p></p>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">What most experimenters take for granted before they begin their experiments is infinitely more interesting than any results to which their experiments lead.<br>
-- Norbert Wiener<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"> <o:p></o:p></p>
</div>
<p class="MsoNormal" style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><a href="https://urldefense.us/v3/__http://www.cse.buffalo.edu/*knepley/__;fg!!G_uCfscf7eWS!aQJpmm5W6l6FUiumnIPmkouzwzNUfx-Dyq04i1O2KS_InQGk6qjI7wUir0Hx6QEUQE2AMiJDsez3x2Os2C2d$" target="_blank">https://www.cse.buffalo.edu/~knepley/</a><o:p></o:p></p>
</div>
</div>
</div>
</blockquote>
</div>
</div>
</div>
</div>
</body>
</html>