Hi,
 Gently ping it.
https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653180.html

Thanks
Gui Haochen

在 2024/5/31 11:25, HAO CHEN GUI 写道:
> Hi,
>   This patch optimizes vector construction with two vector doubleword loads.
> It generates an optimal insn sequence as "xxlor" has lower latency than
> "mtvsrdd" on Power10.
> 
>   Compared with previous version, the main change is to use "isa" attribute
> to guard "lxsd" and "lxsdx".
> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653103.html
> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no
> regressions. OK for the trunk?
> 
> Thanks
> Gui Haochen
> 
> ChangeLog
> rs6000: Optimize vector construction with two vector doubleword loads
> 
> When constructing a vector by two doublewords from memory, originally it
> does
>       ld 10,0(3)
>       ld 9,0(4)
>       mtvsrdd 34,9,10
> 
> An optimal sequence on Power10 should be
>       lxsd 0,0(4)
>       lxvrdx 1,0,3
>       xxlor 34,1,32
> 
> This patch does this optimization by insn combine and split.
> 
> gcc/
>       PR target/103568
>       * config/rs6000/vsx.md (vsx_ld_lowpart_zero_<mode>): New insn
>       pattern.
>       (vsx_ld_highpart_zero_<mode>): New insn pattern.
>       (vsx_concat_mem_<mode>): New insn_and_split pattern.
> 
> gcc/testsuite/
>       PR target/103568
>       * gcc.target/powerpc/pr103568.c: New test.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index f135fa079bd..f9a2a260e89 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -1395,6 +1395,27 @@ (define_insn "vsx_ld_elemrev_v2di"
>    "lxvd2x %x0,%y1"
>    [(set_attr "type" "vecload")])
> 
> +(define_insn "vsx_ld_lowpart_zero_<mode>"
> +  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=v,wa")
> +     (vec_concat:VSX_D
> +       (match_operand:<VEC_base> 1 "memory_operand" "wY,Z")
> +       (match_operand:<VEC_base> 2 "zero_constant" "j,j")))]
> +  ""
> +  "@
> +   lxsd %0,%1
> +   lxsdx %x0,%y1"
> +  [(set_attr "type" "vecload,vecload")
> +   (set_attr "isa" "p9v,p7v")])
> +
> +(define_insn "vsx_ld_highpart_zero_<mode>"
> +  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
> +     (vec_concat:VSX_D
> +       (match_operand:<VEC_base> 1 "zero_constant" "j")
> +       (match_operand:<VEC_base> 2 "memory_operand" "Z")))]
> +  "TARGET_POWER10"
> +  "lxvrdx %x0,%y2"
> +  [(set_attr "type" "vecload")])
> +
>  (define_insn "vsx_ld_elemrev_v1ti"
>    [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
>          (vec_select:V1TI
> @@ -3063,6 +3084,26 @@ (define_insn "vsx_concat_<mode>"
>  }
>    [(set_attr "type" "vecperm,vecmove")])
> 
> +(define_insn_and_split "vsx_concat_mem_<mode>"
> +  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=v,wa")
> +     (vec_concat:VSX_D
> +       (match_operand:<VEC_base> 1 "memory_operand" "wY,Z")
> +       (match_operand:<VEC_base> 2 "memory_operand" "Z,Z")))]
> +  "TARGET_POWER10 && can_create_pseudo_p ()"
> +  "#"
> +  "&& 1"
> +  [(const_int 0)]
> +{
> +  rtx tmp1 = gen_reg_rtx (<MODE>mode);
> +  rtx tmp2 = gen_reg_rtx (<MODE>mode);
> +  emit_insn (gen_vsx_ld_highpart_zero_<mode> (tmp1, CONST0_RTX 
> (<VEC_base>mode),
> +                                           operands[1]));
> +  emit_insn (gen_vsx_ld_lowpart_zero_<mode> (tmp2, operands[2],
> +                                          CONST0_RTX (<VEC_base>mode)));
> +  emit_insn (gen_ior<mode>3 (operands[0], tmp1, tmp2));
> +  DONE;
> +})
> +
>  ;; Combiner patterns to allow creating XXPERMDI's to access either double
>  ;; word element in a vector register.
>  (define_insn "*vsx_concat_<mode>_1"
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr103568.c 
> b/gcc/testsuite/gcc.target/powerpc/pr103568.c
> new file mode 100644
> index 00000000000..b2a06fb2162
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr103568.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
> +
> +vector double test (double *a, double *b)
> +{
> +  return (vector double) {*a, *b};
> +}
> +
> +vector long long test1 (long long *a, long long *b)
> +{
> +  return (vector long long) {*a, *b};
> +}
> +
> +/* { dg-final { scan-assembler-times {\mlxsd} 2 } } */
> +/* { dg-final { scan-assembler-times {\mlxvrdx\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mxxlor\M} 2 } } */
> +

Reply via email to