> -----Original Message----- > From: Jan Beulich <jbeul...@suse.com> > Sent: Tuesday, July 11, 2023 2:04 PM > To: gcc-patches@gcc.gnu.org > Cc: Kirill Yukhin <kirill.yuk...@gmail.com>; Liu, Hongtao > <hongtao....@intel.com> > Subject: [PATCH v3] x86: make better use of VBROADCASTSS / > VPBROADCASTD > > ... in vec_dupv4sf / *vec_dupv4si. The respective broadcast insns are never > longer (yet sometimes shorter) than the corresponding VSHUFPS / VPSHUFD, > due to the immediate operand of the shuffle insns balancing the > (uniform) need for VEX3 in the broadcast ones. When EVEX encoding is > respective the broadcast insns are always shorter. > > Add new alternatives to cover the AVX2 and AVX512 cases as appropriate. > > While touching this anyway, switch to consistently using "sseshuf1" in the > "type" attributes for all shuffle forms. > > gcc/ > > * config/i386/sse.md (vec_dupv4sf): Make first alternative use > vbroadcastss for AVX2. New AVX512F alternative. > (*vec_dupv4si): New AVX2 and AVX512F alternatives using > vpbroadcastd. Replace sselog1 by sseshuf1 in "type" attribute. > > gcc/testsuite/ > > * gcc.target/i386/avx2-dupv4sf.c: New test. > * gcc.target/i386/avx2-dupv4si.c: Likewise. > * gcc.target/i386/avx512f-dupv4sf.c: Likewise. > * gcc.target/i386/avx512f-dupv4si.c: Likewise. > --- > Note that unlike originally intended, "prefix_extra" isn't dropped: > "length_vex" uses it to determine whether 2-byte VEX encoding is possible > (which it isn't for VBROADCASTSS / VPBROADCASTD). "length" > itself specifically does not use it for VEX/EVEX encoded insns. > > Especially with the added "enabled" attribute I didn't really see how to > (further) fold alternatives 0 and 1. Instead *vec_dupv4si might benefit from > using sse2_noavx2 instead of sse2 for alternative 2, except that there is no > sse2_noavx2, only sse2_noavx. > > I'm working from the assumption that the isa attributes to the original 1st > and > 2nd alternatives don't need further restricting (to sse2_noavx2 or > avx_noavx2 as applicable), as the new earlier alternatives cover all operand > forms already when at least AVX2 is enabled. Yes, the patch LGTM. > --- > v3: Testcases for new alternatives. "type" and "prefix_extra" > adjustments. > v2: Correct operand constraints. Respect -mprefer-vector-width=. Fold > two alternatives of vec_dupv4sf. > > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -25969,41 +25969,64 @@ > (const_int 1)))]) > > (define_insn "vec_dupv4sf" > - [(set (match_operand:V4SF 0 "register_operand" "=v,v,x") > + [(set (match_operand:V4SF 0 "register_operand" "=v,v,v,x") > (vec_duplicate:V4SF > - (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))] > + (match_operand:SF 1 "nonimmediate_operand" "Yv,v,m,0")))] > "TARGET_SSE" > "@ > - vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0} > + * return TARGET_AVX2 ? \"vbroadcastss\t{%1, %0|%0, %1}\" : > \"vshufps\t{$0, %d1, %0|%0, %d1, 0}\"; > + vbroadcastss\t{%1, %g0|%g0, %1} > vbroadcastss\t{%1, %0|%0, %1} > shufps\t{$0, %0, %0|%0, %0, 0}" > - [(set_attr "isa" "avx,avx,noavx") > - (set_attr "type" "sseshuf1,ssemov,sseshuf1") > - (set_attr "length_immediate" "1,0,1") > - (set_attr "prefix_extra" "0,1,*") > - (set_attr "prefix" "maybe_evex,maybe_evex,orig") > - (set_attr "mode" "V4SF")]) > + [(set_attr "isa" "avx,*,avx,noavx") > + (set (attr "type") > + (cond [(and (eq_attr "alternative" "0") > + (match_test "!TARGET_AVX2")) > + (const_string "sseshuf1") > + (eq_attr "alternative" "3") > + (const_string "sseshuf1") > + ] > + (const_string "ssemov"))) > + (set (attr "length_immediate") > + (if_then_else (eq_attr "type" "sseshuf1") > + (const_string "1") > + (const_string "0"))) > + (set_attr "prefix_extra" "0,1,1,*") > + (set_attr "prefix" "maybe_evex,evex,maybe_evex,orig") > + (set_attr "mode" "V4SF,V16SF,V4SF,V4SF") > + (set (attr "enabled") > + (if_then_else (eq_attr "alternative" "1") > + (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL > + && !TARGET_PREFER_AVX256") > + (const_string "*")))]) > > (define_insn "*vec_dupv4si" > - [(set (match_operand:V4SI 0 "register_operand" "=v,v,x") > + [(set (match_operand:V4SI 0 "register_operand" "=v,v,v,v,x") > (vec_duplicate:V4SI > - (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))] > + (match_operand:SI 1 "nonimmediate_operand" "Yvm,v,Yv,m,0")))] > "TARGET_SSE" > "@ > + vpbroadcastd\t{%1, %0|%0, %1} > + vpbroadcastd\t{%1, %g0|%g0, %1} > %vpshufd\t{$0, %1, %0|%0, %1, 0} > vbroadcastss\t{%1, %0|%0, %1} > shufps\t{$0, %0, %0|%0, %0, 0}" > - [(set_attr "isa" "sse2,avx,noavx") > - (set_attr "type" "sselog1,ssemov,sselog1") > - (set_attr "length_immediate" "1,0,1") > - (set_attr "prefix_extra" "0,1,*") > - (set_attr "prefix" "maybe_vex,maybe_evex,orig") > - (set_attr "mode" "TI,V4SF,V4SF") > + [(set_attr "isa" "avx2,*,sse2,avx,noavx") > + (set_attr "type" "ssemov,ssemov,sseshuf1,ssemov,sseshuf1") > + (set_attr "length_immediate" "0,0,1,0,1") > + (set_attr "prefix_extra" "1,1,0,1,*") > + (set_attr "prefix" "maybe_evex,evex,maybe_vex,maybe_evex,orig") > + (set_attr "mode" "TI,XI,TI,V4SF,V4SF") > (set (attr "preferred_for_speed") > - (cond [(eq_attr "alternative" "1") > + (cond [(eq_attr "alternative" "3") > (symbol_ref "!TARGET_INTER_UNIT_MOVES_TO_VEC") > ] > - (symbol_ref "true")))]) > + (symbol_ref "true"))) > + (set (attr "enabled") > + (if_then_else (eq_attr "alternative" "1") > + (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL > + && !TARGET_PREFER_AVX256") > + (const_string "*")))]) > > (define_insn "*vec_dupv2di" > [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,v,x") > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx2-dupv4sf.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx2 -O2" } */ > +/* { dg-final { scan-assembler-times "vbroadcastss" 2 } } */ > + > +typedef float __attribute__ ((vector_size (16))) v4sf; > + > +v4sf bcst_reg (float f) > +{ > + register float x asm ("xmm7") = f; > + > + asm ("" : "+v" (x)); > + return (v4sf) {x, x, x, x}; > +} > + > +v4sf bcst_mem (const float *f) > +{ > + return (v4sf) {*f, *f, *f, *f}; > +} > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx2-dupv4si.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx2 -O2" } */ > +/* { dg-final { scan-assembler-times "vpbroadcastd" 2 } } */ > + > +typedef int __attribute__ ((vector_size (16))) v4si; > + > +v4si bcst_reg (int i) > +{ > + register int x asm ("xmm7") = i; > + > + asm ("" : "+v" (x)); > + return (v4si) {x, x, x, x}; > +} > + > +v4si bcst_mem (const int *i) > +{ > + return (v4si) {*i, *i, *i, *i}; > +} > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512f-dupv4sf.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" > +} */ > +/* { dg-final { scan-assembler "vbroadcastss\[^\n\]*%xmm17, *%zmm" } } > +*/ > + > +typedef float __attribute__ ((vector_size (16))) v4sf; > + > +v4sf bcst (float f) > +{ > + register float x asm ("xmm17") = f; > + > + asm ("" : "+v" (x)); > + return (v4sf) {x, x, x, x}; > +} > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512f-dupv4si.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" > +} */ > +/* { dg-final { scan-assembler "vpbroadcastd\[^\n\]*%xmm17, *%zmm" } } > +*/ > + > +typedef int __attribute__ ((vector_size (16))) v4si; > + > +v4si bcst (int i) > +{ > + register int x asm ("xmm17") = i; > + > + asm ("" : "+v" (x)); > + return (v4si) {x, x, x, x}; > +}
RE: [PATCH v3] x86: make better use of VBROADCASTSS / VPBROADCASTD
Liu, Hongtao via Gcc-patches Mon, 10 Jul 2023 23:33:52 -0700
- [PATCH v3] x86: make better use of VBROADCAST... Jan Beulich via Gcc-patches
- RE: [PATCH v3] x86: make better use of V... Liu, Hongtao via Gcc-patches