On Wed, Apr 17, 2019 at 8:13 PM Jakub Jelinek <ja...@redhat.com> wrote:
>
> Hi!
>
> The following patch fixes a bunch of pastos in the -O0 macros in the
> PR89784 implementation plus testcase coverage that FAILs without the header
> change and succeeds with that (the tests were previously run at -O2 only
> where they test the inline functions and not the macros).
> Because at -O0 the C x * y + z isn't contracted into FMA, there is a small
> precision difference in two of the tests with the chosen constants, so I've
> changed them to ones where a precision difference isn't really possible.
> I think the constants weren't chosen very well, because either we just want
> some basic testing, for which even the adjusted ones are ok, or we want
> to specifically check for FMA, in that case we should check some FMA
> cornercases where without FMA the result is completely different from one
> with FMA.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> And sorry for screwing it up.
>
> 2019-04-17  Hongtao Liu  <hongtao....@intel.com>
>
>         PR target/90125
>         * config/i386/avx512fintrin.h (_mm_maskz_fmadd_round_sd,
>         _mm_maskz_fmadd_round_ss, _mm_maskz_fmsub_round_sd,
>         _mm_maskz_fmsub_round_ss, _mm_maskz_fnmadd_round_sd,
>         _mm_maskz_fnmadd_round_ss, _mm_maskz_fnmsub_round_sd,
>         _mm_maskz_fnmsub_round_ss): Use _maskz builtin instead of _mask3.
>
> 2019-04-17  Jakub Jelinek  <ja...@redhat.com>
>
>         PR target/90125
>         * gcc.target/i386/avx512f-vfmsubXXXss-2.c (avx512f_test): Adjust
>         constants to ensure precise result even when not using fma.
>         * gcc.target/i386/avx512f-vfnmaddXXXss-2.c (avx512f_test): Likewise.
>         * gcc.target/i386/avx512f-vfmaddXXXsd-3.c: New test.
>         * gcc.target/i386/avx512f-vfmaddXXXss-3.c: New test.
>         * gcc.target/i386/avx512f-vfmsubXXXsd-3.c: New test.
>         * gcc.target/i386/avx512f-vfmsubXXXss-3.c: New test.
>         * gcc.target/i386/avx512f-vfnmaddXXXsd-3.c: New test.
>         * gcc.target/i386/avx512f-vfnmaddXXXss-3.c: New test.
>         * gcc.target/i386/avx512f-vfnmsubXXXsd-3.c: New test.
>         * gcc.target/i386/avx512f-vfnmsubXXXss-3.c: New test.

The patch can be committed under obvious rule.

Thanks,
Uros.

> --- gcc/config/i386/avx512fintrin.h.jj  2019-03-22 11:07:00.699948784 +0100
> +++ gcc/config/i386/avx512fintrin.h     2019-04-17 11:24:53.683695473 +0200
> @@ -12104,10 +12104,10 @@ _mm_maskz_fnmsub_round_ss (__mmask8 __U,
>      (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R)
>
>  #define _mm_maskz_fmadd_round_sd(U, A, B, C, R)            \
> -    (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, C, U, R)
> +    (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, C, U, R)
>
>  #define _mm_maskz_fmadd_round_ss(U, A, B, C, R)            \
> -    (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R)
> +    (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, C, U, R)
>
>  #define _mm_mask_fmsub_round_sd(A, U, B, C, R)            \
>      (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, -(C), U, R)
> @@ -12122,10 +12122,10 @@ _mm_maskz_fnmsub_round_ss (__mmask8 __U,
>      (__m128) __builtin_ia32_vfmsubss3_mask3 (A, B, C, U, R)
>
>  #define _mm_maskz_fmsub_round_sd(U, A, B, C, R)            \
> -    (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, -(C), U, R)
> +    (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, -(C), U, R)
>
>  #define _mm_maskz_fmsub_round_ss(U, A, B, C, R)            \
> -    (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, -(C), U, R)
> +    (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, -(C), U, R)
>
>  #define _mm_mask_fnmadd_round_sd(A, U, B, C, R)            \
>      (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), C, U, R)
> @@ -12140,10 +12140,10 @@ _mm_maskz_fnmsub_round_ss (__mmask8 __U,
>      (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R)
>
>  #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R)            \
> -    (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), C, U, R)
> +    (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), C, U, R)
>
>  #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R)            \
> -    (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R)
> +    (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), C, U, R)
>
>  #define _mm_mask_fnmsub_round_sd(A, U, B, C, R)            \
>      (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), -(C), U, R)
> @@ -12158,10 +12158,10 @@ _mm_maskz_fnmsub_round_ss (__mmask8 __U,
>      (__m128) __builtin_ia32_vfmsubss3_mask3 (A, -(B), C, U, R)
>
>  #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R)            \
> -    (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), -(C), U, R)
> +    (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), -(C), U, R)
>
>  #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R)            \
> -    (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), -(C), U, R)
> +    (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), -(C), U, R)
>  #endif
>
>  #ifdef __OPTIMIZE__
> --- gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-2.c.jj    2019-03-22 
> 11:07:00.701948752 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-2.c       2019-04-17 
> 11:35:57.314481901 +0200
> @@ -41,8 +41,8 @@ avx512f_test (void)
>    for (i = 0; i < SIZE; i++)
>      {
>        src1.a[i] = DEFAULT_VALUE;
> -      src2.a[i] = 56.78 * (i + 1) * sign;
> -      src3.a[i] = 90.12 * (i + 2) * sign;
> +      src2.a[i] = 56.75 * (i + 1) * sign;
> +      src3.a[i] = 90.25 * (i + 2) * sign;
>        sign = sign * -1;
>      }
>    for (i = 0; i < SIZE; i++)
> --- gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-2.c.jj   2019-03-22 
> 11:07:00.701948752 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-2.c      2019-04-17 
> 11:36:40.372755625 +0200
> @@ -41,8 +41,8 @@ avx512f_test (void)
>    for (i = 0; i < SIZE; i++)
>      {
>        src1.a[i] = DEFAULT_VALUE;
> -      src2.a[i] = 56.78 * (i + 1) * sign;
> -      src3.a[i] = 90.12 * (i + 2) * sign;
> +      src2.a[i] = 56.75 * (i + 1) * sign;
> +      src3.a[i] = 90.25 * (i + 2) * sign;
>        sign = sign * -1;
>      }
>    for (i = 0; i < SIZE; i++)
> --- gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-3.c.jj    2019-04-17 
> 11:11:13.483563310 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-3.c       2019-04-17 
> 11:11:30.553279159 +0200
> @@ -0,0 +1,5 @@
> +/* { dg-do run } */
> +/* { dg-options "-O0 -mavx512f" } */
> +/* { dg-require-effective-target avx512f } */
> +
> +#include "avx512f-vfmaddXXXsd-2.c"
> --- gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-3.c.jj    2019-04-17 
> 11:11:41.806091847 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-3.c       2019-04-17 
> 11:11:55.740859882 +0200
> @@ -0,0 +1,5 @@
> +/* { dg-do run } */
> +/* { dg-options "-O0 -mavx512f" } */
> +/* { dg-require-effective-target avx512f } */
> +
> +#include "avx512f-vfmaddXXXss-2.c"
> --- gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-3.c.jj    2019-04-17 
> 11:13:02.975730705 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-3.c       2019-04-17 
> 11:13:24.260370262 +0200
> @@ -0,0 +1,5 @@
> +/* { dg-do run } */
> +/* { dg-options "-O0 -mavx512f" } */
> +/* { dg-require-effective-target avx512f } */
> +
> +#include "avx512f-vfmsubXXXsd-2.c"
> --- gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-3.c.jj    2019-04-17 
> 11:13:02.977730671 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-3.c       2019-04-17 
> 11:13:42.046069079 +0200
> @@ -0,0 +1,5 @@
> +/* { dg-do run } */
> +/* { dg-options "-O0 -mavx512f" } */
> +/* { dg-require-effective-target avx512f } */
> +
> +#include "avx512f-vfmsubXXXss-2.c"
> --- gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-3.c.jj   2019-04-17 
> 11:13:02.979730637 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-3.c      2019-04-17 
> 11:13:54.807852972 +0200
> @@ -0,0 +1,5 @@
> +/* { dg-do run } */
> +/* { dg-options "-O0 -mavx512f" } */
> +/* { dg-require-effective-target avx512f } */
> +
> +#include "avx512f-vfnmaddXXXsd-2.c"
> --- gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-3.c.jj   2019-04-17 
> 11:13:02.981730603 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-3.c      2019-04-17 
> 11:14:05.945664352 +0200
> @@ -0,0 +1,5 @@
> +/* { dg-do run } */
> +/* { dg-options "-O0 -mavx512f" } */
> +/* { dg-require-effective-target avx512f } */
> +
> +#include "avx512f-vfnmaddXXXss-2.c"
> --- gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-3.c.jj   2019-04-17 
> 11:13:02.983730569 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-3.c      2019-04-17 
> 11:14:19.444435772 +0200
> @@ -0,0 +1,5 @@
> +/* { dg-do run } */
> +/* { dg-options "-O0 -mavx512f" } */
> +/* { dg-require-effective-target avx512f } */
> +
> +#include "avx512f-vfnmsubXXXsd-2.c"
> --- gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-3.c.jj   2019-04-17 
> 11:13:02.985730535 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-3.c      2019-04-17 
> 11:15:08.390606912 +0200
> @@ -0,0 +1,5 @@
> +/* { dg-do run } */
> +/* { dg-options "-O0 -mavx512f" } */
> +/* { dg-require-effective-target avx512f } */
> +
> +#include "avx512f-vfnmsubXXXss-2.c"
>
>         Jakub

Reply via email to