Hi, This patch defines a new split pattern for TI to V1TI move. The pattern concatenates two subreg:DI of a TI to a V2DI. With the pattern, the subreg pass can do register split for TI when there is a TI to V1TI move. The patch optimizes one unnecessary "mr" out on P9. The new test case illustrates it.
Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. ChangeLog 2021-12-13 Haochen Gui <guih...@linux.ibm.com> gcc/ * config/rs6000/vsx.md (split pattern for TI to V1TI move): Defined. gcc/testsuite/ * gcc.target/powerpc/pr103124.c: New testcase. patch.diff diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index bf033e31c1c..52968eb4609 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -6589,3 +6589,19 @@ (define_insn "xxeval" [(set_attr "type" "vecperm") (set_attr "prefixed" "yes")]) +;; Construct V1TI by vsx_concat_v2di +(define_split + [(set (match_operand:V1TI 0 "vsx_register_operand") + (subreg:V1TI + (match_operand:TI 1 "int_reg_operand") 0 ))] + "TARGET_P9_VECTOR && !reload_completed" + [(const_int 0)] +{ + rtx tmp1 = simplify_gen_subreg (DImode, operands[1], TImode, 0); + rtx tmp2 = simplify_gen_subreg (DImode, operands[1], TImode, 8); + rtx tmp3 = gen_reg_rtx (V2DImode); + emit_insn (gen_vsx_concat_v2di (tmp3, tmp1, tmp2)); + rtx tmp4 = simplify_gen_subreg (V1TImode, tmp3, V2DImode, 0); + emit_move_insn (operands[0], tmp4); + DONE; +}) diff --git a/gcc/testsuite/gcc.target/powerpc/pr103124.c b/gcc/testsuite/gcc.target/powerpc/pr103124.c new file mode 100644 index 00000000000..e9072d19b8e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr103124.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-require-effective-target int128 } */ +/* { dg-options "-O2 -mdejagnu-cpu=power9" } */ +/* { dg-final { scan-assembler-not "\mmr\M" } } */ + +vector __int128 add (long long a) +{ + vector __int128 b; + b = (vector __int128) {a}; + return b; +}