[petsc-dev] AVX kernels, old gcc, still broken
Zhang, Hong
hongzhang at anl.gov
Thu Oct 24 14:00:09 CDT 2019
Hi Lisandro,
Can you please check if the following patch fixes the problem? I will create a MR.
diff --git a/src/mat/impls/aij/seq/aijperm/aijperm.c b/src/mat/impls/aij/seq/aijperm/aijperm.c
index 577dfc6713..568535117a 100644
--- a/src/mat/impls/aij/seq/aijperm/aijperm.c
+++ b/src/mat/impls/aij/seq/aijperm/aijperm.c
@@ -12,7 +12,7 @@
#include <../src/mat/impls/aij/seq/aij.h>
-#if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
+#if defined(PETSC_USE_AVX512_KERNELS) && defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) && !defined(PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND)
#include <immintrin.h>
#if !defined(_MM_SCALE_8)
@@ -301,7 +301,7 @@ PetscErrorCode MatMult_SeqAIJPERM(Mat A,Vec xx,Vec yy)
#if !(defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJPERM) && defined(notworking))
PetscInt i,j;
#endif
-#if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
+#if defined(PETSC_USE_AVX512_KERNELS) && defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) && !defined(PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND)
__m512d vec_x,vec_y,vec_vals;
__m256i vec_idx,vec_ipos,vec_j;
__mmask8 mask;
@@ -401,7 +401,7 @@ PetscErrorCode MatMult_SeqAIJPERM(Mat A,Vec xx,Vec yy)
#pragma _CRI prefervector
#endif
-#if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
+#if defined(PETSC_USE_AVX512_KERNELS) && defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) && !defined(PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND)
vec_y = _mm512_setzero_pd();
ipos = ip[i];
for (j=0; j<(nz>>3); j++) {
@@ -436,7 +436,7 @@ PetscErrorCode MatMult_SeqAIJPERM(Mat A,Vec xx,Vec yy)
* worthwhile to vectorize across the rows, that is, to do the
* matvec by operating with "columns" of the chunk. */
for (j=0; j<nz; j++) {
-#if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
+#if defined(PETSC_USE_AVX512_KERNELS) && defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) && !defined(PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND)
vec_j = _mm256_set1_epi32(j);
for (i=0; i<((isize>>3)<<3); i+=8) {
vec_y = _mm512_loadu_pd(&yp[i]);
Thanks,
Hong
On Oct 24, 2019, at 2:47 PM, Lisandro Dalcin via petsc-dev <petsc-dev at mcs.anl.gov<mailto:petsc-dev at mcs.anl.gov>> wrote:
This is with master, but I bet the issue is also in maint.
* Running on Ubuntu 16
$ uname -a
Linux flamingo 4.4.0-104-generic #127-Ubuntu SMP Mon Dec 11 12:16:42 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux
* With system gcc 5.4
$ mpicc -show
/usr/bin/gcc-5 -I/sw/workstations/apps/linux-ubuntu16.04-x86_64/mpich/3.3.1/gcc-5.4.0/nvejoe25snmak6a7fnjghabxjukjkuiu/include -L/sw/workstations/apps/linux-ubuntu16.04-x86_64/mpich/3.3.1/gcc-5.4.0/nvejoe25snmak6a7fnjghabxjukjkuiu/lib -Wl,-rpath -Wl,/sw/workstations/apps/linux-ubuntu16.04-x86_64/mpich/3.3.1/gcc-5.4.0/nvejoe25snmak6a7fnjghabxjukjkuiu/lib -lmpi
$ mpicc --version
gcc-5 (Ubuntu 5.4.0-6ubuntu1~16.04.11) 5.4.0 20160609
Copyright (C) 2015 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* PETSc configured to NOT USE AVX512 kernels
$ grep avx arch-gnu-opt/lib/petsc/conf/reconfigure-arch-gnu-opt.py
'--with-avx512-kernels=0',
* Bang!
$ touch src/mat/impls/aij/seq/aijperm/aijperm.c
$ make -f gmakefile
Use "/usr/bin/make V=1" to see verbose compile lines, "/usr/bin/make V=0" to suppress.
CC arch-gnu-opt/obj/mat/impls/aij/seq/aijperm/aijperm.o
/home/dalcin/Devel/petsc/src/mat/impls/aij/seq/aijperm/aijperm.c: In function ‘MatMult_SeqAIJPERM’:
/home/dalcin/Devel/petsc/src/mat/impls/aij/seq/aijperm/aijperm.c:426:22: warning: implicit declaration of function ‘_mm512_reduce_add_pd’ [-Wimplicit-function-declaration]
yp[i] += _mm512_reduce_add_pd(vec_y);
--
Lisandro Dalcin
============
Research Scientist
Extreme Computing Research Center (ECRC)
King Abdullah University of Science and Technology (KAUST)
http://ecrc.kaust.edu.sa/
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/petsc-dev/attachments/20191024/822f920b/attachment.html>
More information about the petsc-dev
mailing list