On Wed, Apr 17, 2019 at 8:13 PM Jakub Jelinek <ja...@redhat.com> wrote: > > Hi! > > The following patch fixes a bunch of pastos in the -O0 macros in the > PR89784 implementation plus testcase coverage that FAILs without the header > change and succeeds with that (the tests were previously run at -O2 only > where they test the inline functions and not the macros). > Because at -O0 the C x * y + z isn't contracted into FMA, there is a small > precision difference in two of the tests with the chosen constants, so I've > changed them to ones where a precision difference isn't really possible. > I think the constants weren't chosen very well, because either we just want > some basic testing, for which even the adjusted ones are ok, or we want > to specifically check for FMA, in that case we should check some FMA > cornercases where without FMA the result is completely different from one > with FMA. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > And sorry for screwing it up. > > 2019-04-17 Hongtao Liu <hongtao....@intel.com> > > PR target/90125 > * config/i386/avx512fintrin.h (_mm_maskz_fmadd_round_sd, > _mm_maskz_fmadd_round_ss, _mm_maskz_fmsub_round_sd, > _mm_maskz_fmsub_round_ss, _mm_maskz_fnmadd_round_sd, > _mm_maskz_fnmadd_round_ss, _mm_maskz_fnmsub_round_sd, > _mm_maskz_fnmsub_round_ss): Use _maskz builtin instead of _mask3. > > 2019-04-17 Jakub Jelinek <ja...@redhat.com> > > PR target/90125 > * gcc.target/i386/avx512f-vfmsubXXXss-2.c (avx512f_test): Adjust > constants to ensure precise result even when not using fma. > * gcc.target/i386/avx512f-vfnmaddXXXss-2.c (avx512f_test): Likewise. > * gcc.target/i386/avx512f-vfmaddXXXsd-3.c: New test. > * gcc.target/i386/avx512f-vfmaddXXXss-3.c: New test. > * gcc.target/i386/avx512f-vfmsubXXXsd-3.c: New test. > * gcc.target/i386/avx512f-vfmsubXXXss-3.c: New test. > * gcc.target/i386/avx512f-vfnmaddXXXsd-3.c: New test. > * gcc.target/i386/avx512f-vfnmaddXXXss-3.c: New test. > * gcc.target/i386/avx512f-vfnmsubXXXsd-3.c: New test. > * gcc.target/i386/avx512f-vfnmsubXXXss-3.c: New test.
The patch can be committed under obvious rule. Thanks, Uros. > --- gcc/config/i386/avx512fintrin.h.jj 2019-03-22 11:07:00.699948784 +0100 > +++ gcc/config/i386/avx512fintrin.h 2019-04-17 11:24:53.683695473 +0200 > @@ -12104,10 +12104,10 @@ _mm_maskz_fnmsub_round_ss (__mmask8 __U, > (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R) > > #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \ > - (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, C, U, R) > + (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, C, U, R) > > #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \ > - (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R) > + (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, C, U, R) > > #define _mm_mask_fmsub_round_sd(A, U, B, C, R) \ > (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, -(C), U, R) > @@ -12122,10 +12122,10 @@ _mm_maskz_fnmsub_round_ss (__mmask8 __U, > (__m128) __builtin_ia32_vfmsubss3_mask3 (A, B, C, U, R) > > #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \ > - (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, -(C), U, R) > + (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, -(C), U, R) > > #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \ > - (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, -(C), U, R) > + (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, -(C), U, R) > > #define _mm_mask_fnmadd_round_sd(A, U, B, C, R) \ > (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), C, U, R) > @@ -12140,10 +12140,10 @@ _mm_maskz_fnmsub_round_ss (__mmask8 __U, > (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R) > > #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \ > - (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), C, U, R) > + (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), C, U, R) > > #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \ > - (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R) > + (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), C, U, R) > > #define _mm_mask_fnmsub_round_sd(A, U, B, C, R) \ > (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), -(C), U, R) > @@ -12158,10 +12158,10 @@ _mm_maskz_fnmsub_round_ss (__mmask8 __U, > (__m128) __builtin_ia32_vfmsubss3_mask3 (A, -(B), C, U, R) > > #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \ > - (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), -(C), U, R) > + (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), -(C), U, R) > > #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \ > - (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), -(C), U, R) > + (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), -(C), U, R) > #endif > > #ifdef __OPTIMIZE__ > --- gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-2.c.jj 2019-03-22 > 11:07:00.701948752 +0100 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-2.c 2019-04-17 > 11:35:57.314481901 +0200 > @@ -41,8 +41,8 @@ avx512f_test (void) > for (i = 0; i < SIZE; i++) > { > src1.a[i] = DEFAULT_VALUE; > - src2.a[i] = 56.78 * (i + 1) * sign; > - src3.a[i] = 90.12 * (i + 2) * sign; > + src2.a[i] = 56.75 * (i + 1) * sign; > + src3.a[i] = 90.25 * (i + 2) * sign; > sign = sign * -1; > } > for (i = 0; i < SIZE; i++) > --- gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-2.c.jj 2019-03-22 > 11:07:00.701948752 +0100 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-2.c 2019-04-17 > 11:36:40.372755625 +0200 > @@ -41,8 +41,8 @@ avx512f_test (void) > for (i = 0; i < SIZE; i++) > { > src1.a[i] = DEFAULT_VALUE; > - src2.a[i] = 56.78 * (i + 1) * sign; > - src3.a[i] = 90.12 * (i + 2) * sign; > + src2.a[i] = 56.75 * (i + 1) * sign; > + src3.a[i] = 90.25 * (i + 2) * sign; > sign = sign * -1; > } > for (i = 0; i < SIZE; i++) > --- gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-3.c.jj 2019-04-17 > 11:11:13.483563310 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-3.c 2019-04-17 > 11:11:30.553279159 +0200 > @@ -0,0 +1,5 @@ > +/* { dg-do run } */ > +/* { dg-options "-O0 -mavx512f" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#include "avx512f-vfmaddXXXsd-2.c" > --- gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-3.c.jj 2019-04-17 > 11:11:41.806091847 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-3.c 2019-04-17 > 11:11:55.740859882 +0200 > @@ -0,0 +1,5 @@ > +/* { dg-do run } */ > +/* { dg-options "-O0 -mavx512f" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#include "avx512f-vfmaddXXXss-2.c" > --- gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-3.c.jj 2019-04-17 > 11:13:02.975730705 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-3.c 2019-04-17 > 11:13:24.260370262 +0200 > @@ -0,0 +1,5 @@ > +/* { dg-do run } */ > +/* { dg-options "-O0 -mavx512f" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#include "avx512f-vfmsubXXXsd-2.c" > --- gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-3.c.jj 2019-04-17 > 11:13:02.977730671 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-3.c 2019-04-17 > 11:13:42.046069079 +0200 > @@ -0,0 +1,5 @@ > +/* { dg-do run } */ > +/* { dg-options "-O0 -mavx512f" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#include "avx512f-vfmsubXXXss-2.c" > --- gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-3.c.jj 2019-04-17 > 11:13:02.979730637 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-3.c 2019-04-17 > 11:13:54.807852972 +0200 > @@ -0,0 +1,5 @@ > +/* { dg-do run } */ > +/* { dg-options "-O0 -mavx512f" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#include "avx512f-vfnmaddXXXsd-2.c" > --- gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-3.c.jj 2019-04-17 > 11:13:02.981730603 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-3.c 2019-04-17 > 11:14:05.945664352 +0200 > @@ -0,0 +1,5 @@ > +/* { dg-do run } */ > +/* { dg-options "-O0 -mavx512f" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#include "avx512f-vfnmaddXXXss-2.c" > --- gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-3.c.jj 2019-04-17 > 11:13:02.983730569 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-3.c 2019-04-17 > 11:14:19.444435772 +0200 > @@ -0,0 +1,5 @@ > +/* { dg-do run } */ > +/* { dg-options "-O0 -mavx512f" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#include "avx512f-vfnmsubXXXsd-2.c" > --- gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-3.c.jj 2019-04-17 > 11:13:02.985730535 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-3.c 2019-04-17 > 11:15:08.390606912 +0200 > @@ -0,0 +1,5 @@ > +/* { dg-do run } */ > +/* { dg-options "-O0 -mavx512f" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#include "avx512f-vfnmsubXXXss-2.c" > > Jakub