On Thu, Jul 1, 2021 at 2:18 PM liuhongt <hongtao....@intel.com> wrote: > > gcc/ChangeLog: > > * config/i386/i386-features.c (i386-features.c): Handle > E_HFmode. > * config/i386/i386.md (sqrthf2): New expander. > (*sqrt<mode>2_sse): Extend to MODEFH. > * config/i386/sse.md > (*<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>): > Extend to VFH_128. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/avx512fp16-builtin-sqrt-1.c: New test. > * gcc.target/i386/avx512fp16vl-builtin-sqrt-1.c: New test. > --- > gcc/config/i386/i386-features.c | 15 +++++++++++---- > gcc/config/i386/i386.md | 12 +++++++++--- > gcc/config/i386/sse.md | 8 ++++---- > .../i386/avx512fp16-builtin-sqrt-1.c | 18 ++++++++++++++++++ > .../i386/avx512fp16vl-builtin-sqrt-1.c | 19 +++++++++++++++++++ > 5 files changed, 61 insertions(+), 11 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-builtin-sqrt-1.c > create mode 100644 > gcc/testsuite/gcc.target/i386/avx512fp16vl-builtin-sqrt-1.c > > diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c > index a25769ae478..0b5a1a3af53 100644 > --- a/gcc/config/i386/i386-features.c > +++ b/gcc/config/i386/i386-features.c > @@ -2238,15 +2238,22 @@ remove_partial_avx_dependency (void) > > rtx zero; > machine_mode dest_vecmode; > - if (dest_mode == E_SFmode) > + switch (dest_mode) > { > + case E_HFmode: > + dest_vecmode = V8HFmode; > + zero = gen_rtx_SUBREG (V8HFmode, v4sf_const0, 0); > + break; > + case E_SFmode: > dest_vecmode = V4SFmode; > zero = v4sf_const0; > - } > - else > - { > + break; > + case E_DFmode: > dest_vecmode = V2DFmode; > zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0); > + break; > + default: > + gcc_unreachable (); > } > > /* Change source to vector mode. */ > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index a85c23d74f1..81c893c60de 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -16561,9 +16561,9 @@ (define_expand "rsqrtsf2" > }) > > (define_insn "*sqrt<mode>2_sse" > - [(set (match_operand:MODEF 0 "register_operand" "=v,v,v") > - (sqrt:MODEF > - (match_operand:MODEF 1 "nonimmediate_operand" "0,v,m")))] > + [(set (match_operand:MODEFH 0 "register_operand" "=v,v,v") > + (sqrt:MODEFH > + (match_operand:MODEFH 1 "nonimmediate_operand" "0,v,m")))] > "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" > "@ > %vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1} > @@ -16583,6 +16583,12 @@ (define_insn "*sqrt<mode>2_sse" > ] > (symbol_ref "true")))]) > As mentioned by uros, l think this also better has a separate pattern for hf. > +(define_expand "sqrthf2" > + [(set (match_operand:HF 0 "register_operand") > + (sqrt:HF > + (match_operand:HF 1 "nonimmediate_operand")))] > + "TARGET_AVX512FP16") > + > (define_expand "sqrt<mode>2" > [(set (match_operand:MODEF 0 "register_operand") > (sqrt:MODEF > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 2c3dba5bdb0..b47e7f0b82a 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -2389,12 +2389,12 @@ (define_insn > "<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>" > (set_attr "mode" "<ssescalarmode>")]) > > (define_insn "*<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>" > - [(set (match_operand:VF_128 0 "register_operand" "=x,v") > - (vec_merge:VF_128 > - (vec_duplicate:VF_128 > + [(set (match_operand:VFH_128 0 "register_operand" "=x,v") > + (vec_merge:VFH_128 > + (vec_duplicate:VFH_128 > (sqrt:<ssescalarmode> > (match_operand:<ssescalarmode> 1 "nonimmediate_operand" > "xm,<round_scalar_constraint>"))) > - (match_operand:VF_128 2 "register_operand" "0,v") > + (match_operand:VFH_128 2 "register_operand" "0,v") > (const_int 1)))] > "TARGET_SSE" > "@ > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-builtin-sqrt-1.c > b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin-sqrt-1.c > new file mode 100644 > index 00000000000..38cdf23fef7 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin-sqrt-1.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Ofast -mavx512fp16" } */ > + > +_Float16 > +f1 (_Float16 x) > +{ > + return __builtin_sqrtf16 (x); > +} > + > +void > +f2 (_Float16* __restrict psrc, _Float16* __restrict pdst) > +{ > + for (int i = 0; i != 32; i++) > + pdst[i] = __builtin_sqrtf16 (psrc[i]); > +} > + > +/* { dg-final { scan-assembler-times "vsqrtsh\[^\n\r\]*xmm\[0-9\]" 1 } } */ > +/* { dg-final { scan-assembler-times "vsqrtph\[^\n\r\]*zmm\[0-9\]" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-builtin-sqrt-1.c > b/gcc/testsuite/gcc.target/i386/avx512fp16vl-builtin-sqrt-1.c > new file mode 100644 > index 00000000000..08deb3ea470 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-builtin-sqrt-1.c > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Ofast -mavx512fp16 -mavx512vl" } */ > + > +void > +f1 (_Float16* __restrict psrc, _Float16* __restrict pdst) > +{ > + for (int i = 0; i != 8; i++) > + pdst[i] = __builtin_sqrtf16 (psrc[i]); > +} > + > +void > +f2 (_Float16* __restrict psrc, _Float16* __restrict pdst) > +{ > + for (int i = 0; i != 16; i++) > + pdst[i] = __builtin_sqrtf16 (psrc[i]); > +} > + > +/* { dg-final { scan-assembler-times "vsqrtph\[^\n\r\]*xmm\[0-9\]" 1 } } */ > +/* { dg-final { scan-assembler-times "vsqrtph\[^\n\r\]*ymm\[0-9\]" 1 } } */ > -- > 2.18.1 >
-- BR, Hongtao