Hi, Gently ping it. https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653180.html
Thanks Gui Haochen 在 2024/5/31 11:25, HAO CHEN GUI 写道: > Hi, > This patch optimizes vector construction with two vector doubleword loads. > It generates an optimal insn sequence as "xxlor" has lower latency than > "mtvsrdd" on Power10. > > Compared with previous version, the main change is to use "isa" attribute > to guard "lxsd" and "lxsdx". > https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653103.html > > Bootstrapped and tested on powerpc64-linux BE and LE with no > regressions. OK for the trunk? > > Thanks > Gui Haochen > > ChangeLog > rs6000: Optimize vector construction with two vector doubleword loads > > When constructing a vector by two doublewords from memory, originally it > does > ld 10,0(3) > ld 9,0(4) > mtvsrdd 34,9,10 > > An optimal sequence on Power10 should be > lxsd 0,0(4) > lxvrdx 1,0,3 > xxlor 34,1,32 > > This patch does this optimization by insn combine and split. > > gcc/ > PR target/103568 > * config/rs6000/vsx.md (vsx_ld_lowpart_zero_<mode>): New insn > pattern. > (vsx_ld_highpart_zero_<mode>): New insn pattern. > (vsx_concat_mem_<mode>): New insn_and_split pattern. > > gcc/testsuite/ > PR target/103568 > * gcc.target/powerpc/pr103568.c: New test. > > patch.diff > diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md > index f135fa079bd..f9a2a260e89 100644 > --- a/gcc/config/rs6000/vsx.md > +++ b/gcc/config/rs6000/vsx.md > @@ -1395,6 +1395,27 @@ (define_insn "vsx_ld_elemrev_v2di" > "lxvd2x %x0,%y1" > [(set_attr "type" "vecload")]) > > +(define_insn "vsx_ld_lowpart_zero_<mode>" > + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=v,wa") > + (vec_concat:VSX_D > + (match_operand:<VEC_base> 1 "memory_operand" "wY,Z") > + (match_operand:<VEC_base> 2 "zero_constant" "j,j")))] > + "" > + "@ > + lxsd %0,%1 > + lxsdx %x0,%y1" > + [(set_attr "type" "vecload,vecload") > + (set_attr "isa" "p9v,p7v")]) > + > +(define_insn "vsx_ld_highpart_zero_<mode>" > + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") > + (vec_concat:VSX_D > + (match_operand:<VEC_base> 1 "zero_constant" "j") > + (match_operand:<VEC_base> 2 "memory_operand" "Z")))] > + "TARGET_POWER10" > + "lxvrdx %x0,%y2" > + [(set_attr "type" "vecload")]) > + > (define_insn "vsx_ld_elemrev_v1ti" > [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa") > (vec_select:V1TI > @@ -3063,6 +3084,26 @@ (define_insn "vsx_concat_<mode>" > } > [(set_attr "type" "vecperm,vecmove")]) > > +(define_insn_and_split "vsx_concat_mem_<mode>" > + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=v,wa") > + (vec_concat:VSX_D > + (match_operand:<VEC_base> 1 "memory_operand" "wY,Z") > + (match_operand:<VEC_base> 2 "memory_operand" "Z,Z")))] > + "TARGET_POWER10 && can_create_pseudo_p ()" > + "#" > + "&& 1" > + [(const_int 0)] > +{ > + rtx tmp1 = gen_reg_rtx (<MODE>mode); > + rtx tmp2 = gen_reg_rtx (<MODE>mode); > + emit_insn (gen_vsx_ld_highpart_zero_<mode> (tmp1, CONST0_RTX > (<VEC_base>mode), > + operands[1])); > + emit_insn (gen_vsx_ld_lowpart_zero_<mode> (tmp2, operands[2], > + CONST0_RTX (<VEC_base>mode))); > + emit_insn (gen_ior<mode>3 (operands[0], tmp1, tmp2)); > + DONE; > +}) > + > ;; Combiner patterns to allow creating XXPERMDI's to access either double > ;; word element in a vector register. > (define_insn "*vsx_concat_<mode>_1" > diff --git a/gcc/testsuite/gcc.target/powerpc/pr103568.c > b/gcc/testsuite/gcc.target/powerpc/pr103568.c > new file mode 100644 > index 00000000000..b2a06fb2162 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr103568.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ > + > +vector double test (double *a, double *b) > +{ > + return (vector double) {*a, *b}; > +} > + > +vector long long test1 (long long *a, long long *b) > +{ > + return (vector long long) {*a, *b}; > +} > + > +/* { dg-final { scan-assembler-times {\mlxsd} 2 } } */ > +/* { dg-final { scan-assembler-times {\mlxvrdx\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mxxlor\M} 2 } } */ > +