On Sun, Apr 28, 2024 at 7:47 AM liuhongt <hongtao....@intel.com> wrote: > > So when both source operand and dest operand require avx512 MASK_REGS, RA > can allocate MASK_REGS register instead of GPR to avoid reload it from > GPR to MASK_REGS. > It's similar as what did for logic patterns. > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. > Ok for trunk? > > gcc/ChangeLog: > > * config/i386/i386.md: (zero_extendsidi2): Adjust > alternative *k to ?k. > (zero_extend<mode>di2): Ditto. > (*zero_extend<mode>si2): Ditto. > (*zero_extendqihi2): Ditto.
OK. Thanks, Uros. > --- > gcc/config/i386/i386.md | 16 +++---- > .../gcc.target/i386/zero_extendkmask.c | 43 +++++++++++++++++++ > 2 files changed, 51 insertions(+), 8 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/zero_extendkmask.c > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index d4ce3809e6d..f2ab7fdcd58 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -4567,10 +4567,10 @@ (define_expand "zero_extendsidi2" > > (define_insn "*zero_extendsidi2" > [(set (match_operand:DI 0 "nonimmediate_operand" > - "=r,?r,?o,r ,o,?*y,?!*y,$r,$v,$x,*x,*v,*r,*k") > + "=r,?r,?o,r ,o,?*y,?!*y,$r,$v,$x,*x,*v,?r,?k") > (zero_extend:DI > (match_operand:SI 1 "x86_64_zext_operand" > - "0 ,rm,r ,rmWz,0,r ,m ,v ,r ,m ,*x,*v,*k,*km")))] > + "0 ,rm,r ,rmWz,0,r ,m ,v ,r ,m ,*x,*v,?k,?km")))] > "" > { > switch (get_attr_type (insn)) > @@ -4703,9 +4703,9 @@ (define_mode_attr kmov_isa > [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")]) > > (define_insn "zero_extend<mode>di2" > - [(set (match_operand:DI 0 "register_operand" "=r,*r,*k") > + [(set (match_operand:DI 0 "register_operand" "=r,?r,?k") > (zero_extend:DI > - (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))] > + (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,?k,?km")))] > "TARGET_64BIT" > "@ > movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1} > @@ -4758,9 +4758,9 @@ (define_insn_and_split "zero_extend<mode>si2_and" > (set_attr "mode" "SI")]) > > (define_insn "*zero_extend<mode>si2" > - [(set (match_operand:SI 0 "register_operand" "=r,*r,*k") > + [(set (match_operand:SI 0 "register_operand" "=r,?r,?k") > (zero_extend:SI > - (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))] > + (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,?k,?km")))] > "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))" > "@ > movz{<imodesuffix>l|x}\t{%1, %0|%0, %1} > @@ -4813,8 +4813,8 @@ (define_insn_and_split "zero_extendqihi2_and" > > ; zero extend to SImode to avoid partial register stalls > (define_insn "*zero_extendqihi2" > - [(set (match_operand:HI 0 "register_operand" "=r,*r,*k") > - (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" > "qm,*k,*km")))] > + [(set (match_operand:HI 0 "register_operand" "=r,?r,?k") > + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" > "qm,?k,?km")))] > "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))" > "@ > movz{bl|x}\t{%1, %k0|%k0, %1} > diff --git a/gcc/testsuite/gcc.target/i386/zero_extendkmask.c > b/gcc/testsuite/gcc.target/i386/zero_extendkmask.c > new file mode 100644 > index 00000000000..6b18980bbd1 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/zero_extendkmask.c > @@ -0,0 +1,43 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-march=x86-64-v4 -O2" } */ > +/* { dg-final { scan-assembler-not {(?n)shr[bwl]} } } */ > +/* { dg-final { scan-assembler-not {(?n)movz[bw]} } } */ > + > +#include<immintrin.h> > + > +__m512 > +foo (__m512d a, __m512d b, __m512 c, __m512 d) > +{ > + return _mm512_mask_mov_ps (c, (__mmask16) (_mm512_cmpeq_pd_mask (a, b) >> > 1), d); > +} > + > + > +__m512i > +foo1 (__m512d a, __m512d b, __m512i c, __m512i d) > +{ > + return _mm512_mask_mov_epi16 (c, (__mmask32) (_mm512_cmpeq_pd_mask (a, b) > >> 1), d); > +} > + > +__m512i > +foo2 (__m512d a, __m512d b, __m512i c, __m512i d) > +{ > + return _mm512_mask_mov_epi8 (c, (__mmask64) (_mm512_cmpeq_pd_mask (a, b) > >> 1), d); > +} > + > +__m512i > +foo3 (__m512 a, __m512 b, __m512i c, __m512i d) > +{ > + return _mm512_mask_mov_epi16 (c, (__mmask32) (_mm512_cmpeq_ps_mask (a, b) > >> 1), d); > +} > + > +__m512i > +foo4 (__m512 a, __m512 b, __m512i c, __m512i d) > +{ > + return _mm512_mask_mov_epi8 (c, (__mmask64) (_mm512_cmpeq_ps_mask (a, b) > >> 1), d); > +} > + > +__m512i > +foo5 (__m512i a, __m512i b, __m512i c, __m512i d) > +{ > + return _mm512_mask_mov_epi8 (c, (__mmask64) (_mm512_cmp_epi16_mask (a, b, > 5) >> 1), d); > +} > -- > 2.31.1 >