[petsc-dev] AVX kernels, old gcc, still broken

Zhang, Hong hongzhang at anl.gov
Thu Oct 24 14:00:09 CDT 2019


Hi Lisandro,

Can you please check if the following patch fixes the problem? I will create a MR.

diff --git a/src/mat/impls/aij/seq/aijperm/aijperm.c b/src/mat/impls/aij/seq/aijperm/aijperm.c
index 577dfc6713..568535117a 100644
--- a/src/mat/impls/aij/seq/aijperm/aijperm.c
+++ b/src/mat/impls/aij/seq/aijperm/aijperm.c
@@ -12,7 +12,7 @@

 #include <../src/mat/impls/aij/seq/aij.h>

-#if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
+#if defined(PETSC_USE_AVX512_KERNELS) && defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) && !defined(PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND)
 #include <immintrin.h>

 #if !defined(_MM_SCALE_8)
@@ -301,7 +301,7 @@ PetscErrorCode MatMult_SeqAIJPERM(Mat A,Vec xx,Vec yy)
 #if !(defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJPERM) && defined(notworking))
   PetscInt          i,j;
 #endif
-#if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
+#if defined(PETSC_USE_AVX512_KERNELS) && defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) && !defined(PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND)
   __m512d           vec_x,vec_y,vec_vals;
   __m256i           vec_idx,vec_ipos,vec_j;
   __mmask8           mask;
@@ -401,7 +401,7 @@ PetscErrorCode MatMult_SeqAIJPERM(Mat A,Vec xx,Vec yy)
 #pragma _CRI prefervector
 #endif

-#if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
+#if defined(PETSC_USE_AVX512_KERNELS) && defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) && !defined(PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND)
             vec_y = _mm512_setzero_pd();
             ipos = ip[i];
             for (j=0; j<(nz>>3); j++) {
@@ -436,7 +436,7 @@ PetscErrorCode MatMult_SeqAIJPERM(Mat A,Vec xx,Vec yy)
            * worthwhile to vectorize across the rows, that is, to do the
            * matvec by operating with "columns" of the chunk. */
           for (j=0; j<nz; j++) {
-#if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
+#if defined(PETSC_USE_AVX512_KERNELS) && defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) && !defined(PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND)
             vec_j = _mm256_set1_epi32(j);
             for (i=0; i<((isize>>3)<<3); i+=8) {
               vec_y    = _mm512_loadu_pd(&yp[i]);


Thanks,
Hong

On Oct 24, 2019, at 2:47 PM, Lisandro Dalcin via petsc-dev <petsc-dev at mcs.anl.gov<mailto:petsc-dev at mcs.anl.gov>> wrote:

This is with master, but I bet the issue is also in maint.

* Running on Ubuntu 16

$ uname -a
Linux flamingo 4.4.0-104-generic #127-Ubuntu SMP Mon Dec 11 12:16:42 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux

* With system gcc 5.4

$ mpicc -show
/usr/bin/gcc-5 -I/sw/workstations/apps/linux-ubuntu16.04-x86_64/mpich/3.3.1/gcc-5.4.0/nvejoe25snmak6a7fnjghabxjukjkuiu/include -L/sw/workstations/apps/linux-ubuntu16.04-x86_64/mpich/3.3.1/gcc-5.4.0/nvejoe25snmak6a7fnjghabxjukjkuiu/lib -Wl,-rpath -Wl,/sw/workstations/apps/linux-ubuntu16.04-x86_64/mpich/3.3.1/gcc-5.4.0/nvejoe25snmak6a7fnjghabxjukjkuiu/lib -lmpi

$ mpicc --version
gcc-5 (Ubuntu 5.4.0-6ubuntu1~16.04.11) 5.4.0 20160609
Copyright (C) 2015 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

* PETSc configured to NOT USE AVX512 kernels

$ grep avx arch-gnu-opt/lib/petsc/conf/reconfigure-arch-gnu-opt.py
    '--with-avx512-kernels=0',

* Bang!

$ touch src/mat/impls/aij/seq/aijperm/aijperm.c
$ make -f gmakefile
Use "/usr/bin/make V=1" to see verbose compile lines, "/usr/bin/make V=0" to suppress.
          CC arch-gnu-opt/obj/mat/impls/aij/seq/aijperm/aijperm.o
/home/dalcin/Devel/petsc/src/mat/impls/aij/seq/aijperm/aijperm.c: In function ‘MatMult_SeqAIJPERM’:
/home/dalcin/Devel/petsc/src/mat/impls/aij/seq/aijperm/aijperm.c:426:22: warning: implicit declaration of function ‘_mm512_reduce_add_pd’ [-Wimplicit-function-declaration]
             yp[i] += _mm512_reduce_add_pd(vec_y);


--
Lisandro Dalcin
============
Research Scientist
Extreme Computing Research Center (ECRC)
King Abdullah University of Science and Technology (KAUST)
http://ecrc.kaust.edu.sa/

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/petsc-dev/attachments/20191024/822f920b/attachment.html>


More information about the petsc-dev mailing list