> -----Original Message-----
> From: Jan Beulich <jbeul...@suse.com>
> Sent: Tuesday, July 11, 2023 2:04 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Kirill Yukhin <kirill.yuk...@gmail.com>; Liu, Hongtao
> <hongtao....@intel.com>
> Subject: [PATCH v3] x86: make better use of VBROADCASTSS /
> VPBROADCASTD
> 
> ... in vec_dupv4sf / *vec_dupv4si. The respective broadcast insns are never
> longer (yet sometimes shorter) than the corresponding VSHUFPS / VPSHUFD,
> due to the immediate operand of the shuffle insns balancing the
> (uniform) need for VEX3 in the broadcast ones. When EVEX encoding is
> respective the broadcast insns are always shorter.
> 
> Add new alternatives to cover the AVX2 and AVX512 cases as appropriate.
> 
> While touching this anyway, switch to consistently using "sseshuf1" in the
> "type" attributes for all shuffle forms.
> 
> gcc/
> 
>       * config/i386/sse.md (vec_dupv4sf): Make first alternative use
>       vbroadcastss for AVX2. New AVX512F alternative.
>       (*vec_dupv4si): New AVX2 and AVX512F alternatives using
>       vpbroadcastd. Replace sselog1 by sseshuf1 in "type" attribute.
> 
> gcc/testsuite/
> 
>       * gcc.target/i386/avx2-dupv4sf.c: New test.
>       * gcc.target/i386/avx2-dupv4si.c: Likewise.
>       * gcc.target/i386/avx512f-dupv4sf.c: Likewise.
>       * gcc.target/i386/avx512f-dupv4si.c: Likewise.
> ---
> Note that unlike originally intended, "prefix_extra" isn't dropped:
> "length_vex" uses it to determine whether 2-byte VEX encoding is possible
> (which it isn't for VBROADCASTSS / VPBROADCASTD). "length"
> itself specifically does not use it for VEX/EVEX encoded insns.
> 
> Especially with the added "enabled" attribute I didn't really see how to
> (further) fold alternatives 0 and 1. Instead *vec_dupv4si might benefit from
> using sse2_noavx2 instead of sse2 for alternative 2, except that there is no
> sse2_noavx2, only sse2_noavx.
> 
> I'm working from the assumption that the isa attributes to the original 1st 
> and
> 2nd alternatives don't need further restricting (to sse2_noavx2 or
> avx_noavx2 as applicable), as the new earlier alternatives cover all operand
> forms already when at least AVX2 is enabled.
Yes, the patch LGTM.
> ---
> v3: Testcases for new alternatives. "type" and "prefix_extra"
>     adjustments.
> v2: Correct operand constraints. Respect -mprefer-vector-width=. Fold
>     two alternatives of vec_dupv4sf.
> 
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -25969,41 +25969,64 @@
>       (const_int 1)))])
> 
>  (define_insn "vec_dupv4sf"
> -  [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
> +  [(set (match_operand:V4SF 0 "register_operand" "=v,v,v,x")
>       (vec_duplicate:V4SF
> -       (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
> +       (match_operand:SF 1 "nonimmediate_operand" "Yv,v,m,0")))]
>    "TARGET_SSE"
>    "@
> -   vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
> +   * return TARGET_AVX2 ? \"vbroadcastss\t{%1, %0|%0, %1}\" :
> \"vshufps\t{$0, %d1, %0|%0, %d1, 0}\";
> +   vbroadcastss\t{%1, %g0|%g0, %1}
>     vbroadcastss\t{%1, %0|%0, %1}
>     shufps\t{$0, %0, %0|%0, %0, 0}"
> -  [(set_attr "isa" "avx,avx,noavx")
> -   (set_attr "type" "sseshuf1,ssemov,sseshuf1")
> -   (set_attr "length_immediate" "1,0,1")
> -   (set_attr "prefix_extra" "0,1,*")
> -   (set_attr "prefix" "maybe_evex,maybe_evex,orig")
> -   (set_attr "mode" "V4SF")])
> +  [(set_attr "isa" "avx,*,avx,noavx")
> +   (set (attr "type")
> +     (cond [(and (eq_attr "alternative" "0")
> +                 (match_test "!TARGET_AVX2"))
> +              (const_string "sseshuf1")
> +            (eq_attr "alternative" "3")
> +              (const_string "sseshuf1")
> +           ]
> +           (const_string "ssemov")))
> +   (set (attr "length_immediate")
> +     (if_then_else (eq_attr "type" "sseshuf1")
> +                   (const_string "1")
> +                   (const_string "0")))
> +   (set_attr "prefix_extra" "0,1,1,*")
> +   (set_attr "prefix" "maybe_evex,evex,maybe_evex,orig")
> +   (set_attr "mode" "V4SF,V16SF,V4SF,V4SF")
> +   (set (attr "enabled")
> +     (if_then_else (eq_attr "alternative" "1")
> +                   (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL
> +                                && !TARGET_PREFER_AVX256")
> +                   (const_string "*")))])
> 
>  (define_insn "*vec_dupv4si"
> -  [(set (match_operand:V4SI 0 "register_operand"     "=v,v,x")
> +  [(set (match_operand:V4SI 0 "register_operand"     "=v,v,v,v,x")
>       (vec_duplicate:V4SI
> -       (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
> +       (match_operand:SI 1 "nonimmediate_operand" "Yvm,v,Yv,m,0")))]
>    "TARGET_SSE"
>    "@
> +   vpbroadcastd\t{%1, %0|%0, %1}
> +   vpbroadcastd\t{%1, %g0|%g0, %1}
>     %vpshufd\t{$0, %1, %0|%0, %1, 0}
>     vbroadcastss\t{%1, %0|%0, %1}
>     shufps\t{$0, %0, %0|%0, %0, 0}"
> -  [(set_attr "isa" "sse2,avx,noavx")
> -   (set_attr "type" "sselog1,ssemov,sselog1")
> -   (set_attr "length_immediate" "1,0,1")
> -   (set_attr "prefix_extra" "0,1,*")
> -   (set_attr "prefix" "maybe_vex,maybe_evex,orig")
> -   (set_attr "mode" "TI,V4SF,V4SF")
> +  [(set_attr "isa" "avx2,*,sse2,avx,noavx")
> +   (set_attr "type" "ssemov,ssemov,sseshuf1,ssemov,sseshuf1")
> +   (set_attr "length_immediate" "0,0,1,0,1")
> +   (set_attr "prefix_extra" "1,1,0,1,*")
> +   (set_attr "prefix" "maybe_evex,evex,maybe_vex,maybe_evex,orig")
> +   (set_attr "mode" "TI,XI,TI,V4SF,V4SF")
>     (set (attr "preferred_for_speed")
> -     (cond [(eq_attr "alternative" "1")
> +     (cond [(eq_attr "alternative" "3")
>             (symbol_ref "!TARGET_INTER_UNIT_MOVES_TO_VEC")
>          ]
> -        (symbol_ref "true")))])
> +        (symbol_ref "true")))
> +   (set (attr "enabled")
> +     (if_then_else (eq_attr "alternative" "1")
> +                   (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL
> +                                && !TARGET_PREFER_AVX256")
> +                   (const_string "*")))])
> 
>  (define_insn "*vec_dupv2di"
>    [(set (match_operand:V2DI 0 "register_operand"     "=x,v,v,v,x")
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx2-dupv4sf.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx2 -O2" } */
> +/* { dg-final { scan-assembler-times "vbroadcastss" 2 } } */
> +
> +typedef float __attribute__ ((vector_size (16))) v4sf;
> +
> +v4sf bcst_reg (float f)
> +{
> +  register float x asm ("xmm7") = f;
> +
> +  asm ("" : "+v" (x));
> +  return (v4sf) {x, x, x, x};
> +}
> +
> +v4sf bcst_mem (const float *f)
> +{
> +  return (v4sf) {*f, *f, *f, *f};
> +}
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx2-dupv4si.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx2 -O2" } */
> +/* { dg-final { scan-assembler-times "vpbroadcastd" 2 } } */
> +
> +typedef int __attribute__ ((vector_size (16))) v4si;
> +
> +v4si bcst_reg (int i)
> +{
> +  register int x asm ("xmm7") = i;
> +
> +  asm ("" : "+v" (x));
> +  return (v4si) {x, x, x, x};
> +}
> +
> +v4si bcst_mem (const int *i)
> +{
> +  return (v4si) {*i, *i, *i, *i};
> +}
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512f-dupv4sf.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2"
> +} */
> +/* { dg-final { scan-assembler "vbroadcastss\[^\n\]*%xmm17, *%zmm" } }
> +*/
> +
> +typedef float __attribute__ ((vector_size (16))) v4sf;
> +
> +v4sf bcst (float f)
> +{
> +  register float x asm ("xmm17") = f;
> +
> +  asm ("" : "+v" (x));
> +  return (v4sf) {x, x, x, x};
> +}
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512f-dupv4si.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2"
> +} */
> +/* { dg-final { scan-assembler "vpbroadcastd\[^\n\]*%xmm17, *%zmm" } }
> +*/
> +
> +typedef int __attribute__ ((vector_size (16))) v4si;
> +
> +v4si bcst (int i)
> +{
> +  register int x asm ("xmm17") = i;
> +
> +  asm ("" : "+v" (x));
> +  return (v4si) {x, x, x, x};
> +}

Reply via email to