Hi, This patch optimizes vector construction with two vector doubleword loads. It generates an optimal insn sequence as "xxlor" has lower latency than "mtvsrdd" on Power10.
Compared with previous version, the main change is to use "isa" attribute to guard "lxsd" and "lxsdx". https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653103.html Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. OK for the trunk? Thanks Gui Haochen ChangeLog rs6000: Optimize vector construction with two vector doubleword loads When constructing a vector by two doublewords from memory, originally it does ld 10,0(3) ld 9,0(4) mtvsrdd 34,9,10 An optimal sequence on Power10 should be lxsd 0,0(4) lxvrdx 1,0,3 xxlor 34,1,32 This patch does this optimization by insn combine and split. gcc/ PR target/103568 * config/rs6000/vsx.md (vsx_ld_lowpart_zero_<mode>): New insn pattern. (vsx_ld_highpart_zero_<mode>): New insn pattern. (vsx_concat_mem_<mode>): New insn_and_split pattern. gcc/testsuite/ PR target/103568 * gcc.target/powerpc/pr103568.c: New test. patch.diff diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index f135fa079bd..f9a2a260e89 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -1395,6 +1395,27 @@ (define_insn "vsx_ld_elemrev_v2di" "lxvd2x %x0,%y1" [(set_attr "type" "vecload")]) +(define_insn "vsx_ld_lowpart_zero_<mode>" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=v,wa") + (vec_concat:VSX_D + (match_operand:<VEC_base> 1 "memory_operand" "wY,Z") + (match_operand:<VEC_base> 2 "zero_constant" "j,j")))] + "" + "@ + lxsd %0,%1 + lxsdx %x0,%y1" + [(set_attr "type" "vecload,vecload") + (set_attr "isa" "p9v,p7v")]) + +(define_insn "vsx_ld_highpart_zero_<mode>" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") + (vec_concat:VSX_D + (match_operand:<VEC_base> 1 "zero_constant" "j") + (match_operand:<VEC_base> 2 "memory_operand" "Z")))] + "TARGET_POWER10" + "lxvrdx %x0,%y2" + [(set_attr "type" "vecload")]) + (define_insn "vsx_ld_elemrev_v1ti" [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa") (vec_select:V1TI @@ -3063,6 +3084,26 @@ (define_insn "vsx_concat_<mode>" } [(set_attr "type" "vecperm,vecmove")]) +(define_insn_and_split "vsx_concat_mem_<mode>" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=v,wa") + (vec_concat:VSX_D + (match_operand:<VEC_base> 1 "memory_operand" "wY,Z") + (match_operand:<VEC_base> 2 "memory_operand" "Z,Z")))] + "TARGET_POWER10 && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + rtx tmp1 = gen_reg_rtx (<MODE>mode); + rtx tmp2 = gen_reg_rtx (<MODE>mode); + emit_insn (gen_vsx_ld_highpart_zero_<mode> (tmp1, CONST0_RTX (<VEC_base>mode), + operands[1])); + emit_insn (gen_vsx_ld_lowpart_zero_<mode> (tmp2, operands[2], + CONST0_RTX (<VEC_base>mode))); + emit_insn (gen_ior<mode>3 (operands[0], tmp1, tmp2)); + DONE; +}) + ;; Combiner patterns to allow creating XXPERMDI's to access either double ;; word element in a vector register. (define_insn "*vsx_concat_<mode>_1" diff --git a/gcc/testsuite/gcc.target/powerpc/pr103568.c b/gcc/testsuite/gcc.target/powerpc/pr103568.c new file mode 100644 index 00000000000..b2a06fb2162 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr103568.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +vector double test (double *a, double *b) +{ + return (vector double) {*a, *b}; +} + +vector long long test1 (long long *a, long long *b) +{ + return (vector long long) {*a, *b}; +} + +/* { dg-final { scan-assembler-times {\mlxsd} 2 } } */ +/* { dg-final { scan-assembler-times {\mlxvrdx\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxlor\M} 2 } } */ +