On Mon, Sep 6, 2021 at 8:35 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > On Sun, Sep 5, 2021 at 10:34 PM Hongtao Liu <crazy...@gmail.com> wrote: > > > > On Sun, Sep 5, 2021 at 5:56 AM H.J. Lu via Gcc-patches > > <gcc-patches@gcc.gnu.org> wrote: > > > > > > Enable FMA in scalar/vector unsigned SI to SF expanders. > > > > > > gcc/ > > > > > > PR target/85819 > > > * config/i386/i386-expand.c (ix86_expand_convert_uns_sisf_sse): > > > Enable FMA. > > > (ix86_expand_vector_convert_uns_vsivsf): Likewise. > > > > > > gcc/testsuite/ > > > > > > PR target/85819 > > > * gcc.target/i386/pr85819-1.c: New test. > > > * gcc.target/i386/pr85819-2a.c: Likewise. > > > * gcc.target/i386/pr85819-2b.c: Likewise. > > > * gcc.target/i386/pr85819-2c.c: Likewise. > > > * gcc.target/i386/pr85819-3.c: Likewise. > > > --- > > > gcc/config/i386/i386-expand.c | 44 ++++++++++++++++------ > > > gcc/testsuite/gcc.target/i386/pr85819-1.c | 11 ++++++ > > > gcc/testsuite/gcc.target/i386/pr85819-2a.c | 17 +++++++++ > > > gcc/testsuite/gcc.target/i386/pr85819-2b.c | 6 +++ > > > gcc/testsuite/gcc.target/i386/pr85819-2c.c | 7 ++++ > > > gcc/testsuite/gcc.target/i386/pr85819-3.c | 18 +++++++++ > > > 6 files changed, 91 insertions(+), 12 deletions(-) > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-1.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2a.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2b.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2c.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-3.c > > > > > > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c > > > index 2500dbfa7fb..26263bbe1af 100644 > > > --- a/gcc/config/i386/i386-expand.c > > > +++ b/gcc/config/i386/i386-expand.c > > > @@ -1851,12 +1851,21 @@ ix86_expand_convert_uns_sisf_sse (rtx target, rtx > > > input) > > > fp_lo = gen_reg_rtx (SFmode); > > > emit_insn (gen_floatsisf2 (fp_hi, int_hi)); > > > emit_insn (gen_floatsisf2 (fp_lo, int_lo)); > > > - fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi, > > > - 0, OPTAB_DIRECT); > > > - fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target, > > > - 0, OPTAB_DIRECT); > > > - if (!rtx_equal_p (target, fp_hi)) > > > - emit_move_insn (target, fp_hi); > > > + if (TARGET_FMA || TARGET_AVX512F) > > Looking at the expander floatunssi<mode>2, the || in the condition > > should never be hit since we have direct vcvtsi2s[sd] instruction > > under TARGET_AVX512F. > > Fixed. > > > > + { > > > + x = validize_mem (force_const_mem (SFmode, x)); > > > + fp_hi = gen_rtx_FMA (SFmode, fp_hi, x, fp_lo); > > > + emit_move_insn (target, fp_hi); > > > + } > > > + else > > > + { > > > + fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi, > > > + 0, OPTAB_DIRECT); > > > + fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target, > > > + 0, OPTAB_DIRECT); > > > + if (!rtx_equal_p (target, fp_hi)) > > > + emit_move_insn (target, fp_hi); > > > + } > > > } > > > > > > /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert > > > @@ -1888,12 +1897,23 @@ ix86_expand_vector_convert_uns_vsivsf (rtx > > > target, rtx val) > > > real_ldexp (&TWO16r, &dconst1, 16); > > > tmp[5] = const_double_from_real_value (TWO16r, SFmode); > > > tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, > > > tmp[5])); > > > - tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, > > > 1, > > > - OPTAB_DIRECT); > > > - tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1, > > > - OPTAB_DIRECT); > > > - if (tmp[7] != target) > > > - emit_move_insn (target, tmp[7]); > > > + unsigned vector_size = GET_MODE_SIZE (fltmode); > > > + if (TARGET_FMA > > > + || (TARGET_AVX512F && vector_size == 64) > > > + || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))) > > Similar here for the last two || in the condition. > > Fixed. > > > > + { > > > + tmp[6] = gen_rtx_FMA (fltmode, tmp[4], tmp[5], tmp[3]); > > > + emit_move_insn (target, tmp[6]); > > > + } > > > + else > > > + { > > > + tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], > > > + NULL_RTX, 1, OPTAB_DIRECT); > > > + tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], > > > + target, 1, OPTAB_DIRECT); > > > + if (tmp[7] != target) > > > + emit_move_insn (target, tmp[7]); > > > + } > > > } > > > > > > /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. > > > fix_trunc* > > Here is the v2 patch. LGTM. > > -- > H.J.
-- BR, Hongtao