On Sun, Sep 5, 2021 at 5:56 AM H.J. Lu via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Enable FMA in scalar/vector unsigned SI to SF expanders. > > gcc/ > > PR target/85819 > * config/i386/i386-expand.c (ix86_expand_convert_uns_sisf_sse): > Enable FMA. > (ix86_expand_vector_convert_uns_vsivsf): Likewise. > > gcc/testsuite/ > > PR target/85819 > * gcc.target/i386/pr85819-1.c: New test. > * gcc.target/i386/pr85819-2a.c: Likewise. > * gcc.target/i386/pr85819-2b.c: Likewise. > * gcc.target/i386/pr85819-2c.c: Likewise. > * gcc.target/i386/pr85819-3.c: Likewise. > --- > gcc/config/i386/i386-expand.c | 44 ++++++++++++++++------ > gcc/testsuite/gcc.target/i386/pr85819-1.c | 11 ++++++ > gcc/testsuite/gcc.target/i386/pr85819-2a.c | 17 +++++++++ > gcc/testsuite/gcc.target/i386/pr85819-2b.c | 6 +++ > gcc/testsuite/gcc.target/i386/pr85819-2c.c | 7 ++++ > gcc/testsuite/gcc.target/i386/pr85819-3.c | 18 +++++++++ > 6 files changed, 91 insertions(+), 12 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2a.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2b.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2c.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-3.c > > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c > index 2500dbfa7fb..26263bbe1af 100644 > --- a/gcc/config/i386/i386-expand.c > +++ b/gcc/config/i386/i386-expand.c > @@ -1851,12 +1851,21 @@ ix86_expand_convert_uns_sisf_sse (rtx target, rtx > input) > fp_lo = gen_reg_rtx (SFmode); > emit_insn (gen_floatsisf2 (fp_hi, int_hi)); > emit_insn (gen_floatsisf2 (fp_lo, int_lo)); > - fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi, > - 0, OPTAB_DIRECT); > - fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target, > - 0, OPTAB_DIRECT); > - if (!rtx_equal_p (target, fp_hi)) > - emit_move_insn (target, fp_hi); > + if (TARGET_FMA || TARGET_AVX512F) Looking at the expander floatunssi<mode>2, the || in the condition should never be hit since we have direct vcvtsi2s[sd] instruction under TARGET_AVX512F. > + { > + x = validize_mem (force_const_mem (SFmode, x)); > + fp_hi = gen_rtx_FMA (SFmode, fp_hi, x, fp_lo); > + emit_move_insn (target, fp_hi); > + } > + else > + { > + fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi, > + 0, OPTAB_DIRECT); > + fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target, > + 0, OPTAB_DIRECT); > + if (!rtx_equal_p (target, fp_hi)) > + emit_move_insn (target, fp_hi); > + } > } > > /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert > @@ -1888,12 +1897,23 @@ ix86_expand_vector_convert_uns_vsivsf (rtx target, > rtx val) > real_ldexp (&TWO16r, &dconst1, 16); > tmp[5] = const_double_from_real_value (TWO16r, SFmode); > tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5])); > - tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1, > - OPTAB_DIRECT); > - tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1, > - OPTAB_DIRECT); > - if (tmp[7] != target) > - emit_move_insn (target, tmp[7]); > + unsigned vector_size = GET_MODE_SIZE (fltmode); > + if (TARGET_FMA > + || (TARGET_AVX512F && vector_size == 64) > + || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))) Similar here for the last two || in the condition. > + { > + tmp[6] = gen_rtx_FMA (fltmode, tmp[4], tmp[5], tmp[3]); > + emit_move_insn (target, tmp[6]); > + } > + else > + { > + tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], > + NULL_RTX, 1, OPTAB_DIRECT); > + tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], > + target, 1, OPTAB_DIRECT); > + if (tmp[7] != target) > + emit_move_insn (target, tmp[7]); > + } > } > > /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc* > diff --git a/gcc/testsuite/gcc.target/i386/pr85819-1.c > b/gcc/testsuite/gcc.target/i386/pr85819-1.c > new file mode 100644 > index 00000000000..db02282d100 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr85819-1.c > @@ -0,0 +1,11 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mno-avx512f -mfma -mfpmath=sse" } */ > + > +float > +foo (unsigned int x) > +{ > + return x; > +} > + > +/* { dg-final { scan-assembler "vfmadd132ss" { target ia32 } } } */ > +/* { dg-final { scan-assembler "vcvtsi2ssq" { target { ! ia32 } } } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr85819-2a.c > b/gcc/testsuite/gcc.target/i386/pr85819-2a.c > new file mode 100644 > index 00000000000..cea599fe416 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr85819-2a.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mno-avx512f -mavx2 -mfma -mfpmath=sse" } */ > + > +typedef float To __attribute__ ((__vector_size__ (32))); > +typedef unsigned int From __attribute__ ((__vector_size__ (32))); > + > +#define A2(I) (float)a[I], (float)a[1+I] > +#define A4(I) A2(I), A2(2+I) > +#define A8(I) A4(I), A4(4+I) > + > +To > +f(From a) > +{ > + return __extension__ (To) {A8(0)}; > +} > + > +/* { dg-final { scan-assembler "vfmadd132ps" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr85819-2b.c > b/gcc/testsuite/gcc.target/i386/pr85819-2b.c > new file mode 100644 > index 00000000000..0750e56f29e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr85819-2b.c > @@ -0,0 +1,6 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mavx512vl -mfpmath=sse" } */ > + > +#include "pr85819-2a.c" > + > +/* { dg-final { scan-assembler "vcvtudq2ps" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr85819-2c.c > b/gcc/testsuite/gcc.target/i386/pr85819-2c.c > new file mode 100644 > index 00000000000..821166908da > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr85819-2c.c > @@ -0,0 +1,7 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mno-fma -mno-avx2 -mno-avx512vl -mavx512f > -mfpmath=sse" } */ > + > +#include "pr85819-2a.c" > + > +/* { dg-final { scan-assembler-not "vcvtudq2ps" } } */ > +/* { dg-final { scan-assembler-not "vfmadd132ps" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr85819-3.c > b/gcc/testsuite/gcc.target/i386/pr85819-3.c > new file mode 100644 > index 00000000000..cd3bf9b8d35 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr85819-3.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mavx512f -mfpmath=sse" } */ > + > +typedef float To __attribute__ ((__vector_size__ (64))); > +typedef unsigned int From __attribute__ ((__vector_size__ (64))); > + > +#define A2(I) (float)a[I], (float)a[1+I] > +#define A4(I) A2(I), A2(2+I) > +#define A8(I) A4(I), A4(4+I) > +#define A16(I) A8(I), A8(8+I) > + > +To > +f(From a) > +{ > + return __extension__ (To) {A16(0)}; > +} > + > +/* { dg-final { scan-assembler "vcvtudq2ps" } } */ > -- > 2.31.1 >
-- BR, Hongtao