The uarch can fuse bstrpick.d rd,rs1,31,0 and alsl.d rd,rd,rs2,shamt, so for this special case we should use alsl.d instead of slli.d. And I'd hoped late combine to handle slli.d + and + add.d => and + slli.d + add.d => and + alsl.d, but it does not always work (even before the alsl.d special case gets in the way). So let's handle this on our own.
The fix is partial: to make the macro-fusion really work we need to implement TARGET_SCHED_MACRO_FUSION_PAIR_P for it. Thus the bitwise-shift-reassoc-fuse.c case now xfail. gcc/ChangeLog: * config/loongarch/loongarch.md (<optab>_shift_reverse<X:mode>): Emit alsl.d instead of slli.d if new mask is 0xffffffff and shamt is const_immalsl_operand. (<optab>_alsl_reverse<X:mode>): New define_insn_and_split. gcc/testsuite/ChangeLog: * gcc.target/loongarch/bitwise-shift-reassoc-dual.c: New test. * gcc.target/loongarch/bitwise-shift-reassoc-fuse.c: New test. * gcc.target/loongarch/bitwise-shift-reassoc.c (t4): Match alsl.d instead of slli.d. * gcc.target/loongarch/bstrpick_alsl_paired.c (scan-tree-dump): Match and_alsl_reversedi instead of and_shift_reversedi. --- gcc/config/loongarch/loongarch.md | 55 +++++++++++++++++-- .../loongarch/bitwise-shift-reassoc-dual.c | 18 ++++++ .../loongarch/bitwise-shift-reassoc-fuse.c | 16 ++++++ .../loongarch/bitwise-shift-reassoc.c | 2 +- .../loongarch/bstrpick_alsl_paired.c | 2 +- 5 files changed, 85 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc-dual.c create mode 100644 gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc-fuse.c diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index 1392325038c..2728d5e4d1c 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -3138,8 +3138,54 @@ (define_insn_and_split "<optab>_shift_reverse<X:mode>" <MODE>mode)" "#" "&& true" + [(set (match_dup 0) (ashift:X (match_dup 0) (match_dup 2)))] + { + operands[3] = loongarch_reassoc_shift_bitwise (<is_and>, + operands[2], + operands[3], + <MODE>mode); + + if (ins_zero_bitmask_operand (operands[3], <MODE>mode)) + { + gcc_checking_assert (<is_and>); + emit_move_insn (operands[0], operands[1]); + operands[1] = operands[0]; + } + + emit_insn (gen_<optab><mode>3 (operands[0], operands[1], operands[3])); + + if (<is_and> + && TARGET_64BIT + && si_mask_operand (operands[3], DImode) + && const_immalsl_operand (operands[2], SImode)) + { + /* Special case for bstrpick.d + alsl.d fusion + TODO: TARGET_SCHED_MACRO_FUSION_PAIR_P */ + emit_insn (gen_alsldi3 (operands[0], operands[0], + operands[2], gen_rtx_REG (DImode, 0))); + DONE; + } + }) + +;; The late_combine2 pass can handle slli.d + add.d => alsl.d, but it seems +;; not covering all cases, and obviously it's broken by the special case +;; using alsl.d rd,rd,r0,shamt instead of slli.d rd,rd,shamt. So +;; implement slli.d + and + add.d => and + alsl.d on our own. +(define_insn_and_split "<optab>_alsl_reverse<X:mode>" + [(set (match_operand:X 0 "register_operand" "=&r") + (plus:X + (any_bitwise:X + (ashift:X (match_operand:X 1 "register_operand" "r0") + (match_operand:SI 2 "const_immalsl_operand" "i")) + (match_operand:X 3 "const_int_operand" "i")) + (match_operand:X 4 "register_operand" "r")))] + "loongarch_reassoc_shift_bitwise (<is_and>, operands[2], operands[3], + <MODE>mode)" + "#" + "&& reload_completed" [(set (match_dup 0) (any_bitwise:X (match_dup 1) (match_dup 3))) - (set (match_dup 0) (ashift:X (match_dup 0) (match_dup 2)))] + (set (match_dup 0) (plus:X (ashift:X (match_dup 0) (match_dup 2)) + (match_dup 4)))] { operands[3] = loongarch_reassoc_shift_bitwise (<is_and>, operands[2], @@ -3154,11 +3200,8 @@ (define_insn_and_split "<optab>_shift_reverse<X:mode>" } }) -;; The late_combine2 pass can handle slli.d + add.d => alsl.d, so we -;; already have slli.d + any_bitwise + add.d => any_bitwise + slli.d + -;; add.d => any_bitwise + alsl.d. But late_combine2 cannot handle slli.d + -;; add.w => alsl.w, so implement slli.d + and + add.w => and + alsl.w on -;; our own. +;; Likewise for slli.d + and + add.w => and + alsl.w, note that late +;; combine cannot help this at all. (define_insn_and_split "<optab>_alsl_reversesi_extended" [(set (match_operand:DI 0 "register_operand" "=&r") (sign_extend:DI diff --git a/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc-dual.c b/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc-dual.c new file mode 100644 index 00000000000..ad66daf6caa --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc-dual.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times "bstrpick\\.\[wd\]" 2 } } */ +/* { dg-final { scan-assembler-times "alsl\\.\[wd\]" 2 } } */ + +struct Pair { unsigned long a, b; }; + +struct Pair +test (struct Pair p, unsigned long x) +{ + p.a &= 0xfffffff; + p.a <<= 2; + p.a += x; + p.b &= 0xfffffff; + p.b <<= 2; + p.b += x; + return p; +} diff --git a/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc-fuse.c b/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc-fuse.c new file mode 100644 index 00000000000..a75c2f0ce12 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc-fuse.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ +/* { dg-final { scan-assembler-times "alsl.d" 2 } } */ +/* { dg-final { scan-assembler-times "bstrpick\\.d\[^\n\]*\n\talsl.d" 2 { xfail *-*-* } } } */ + +struct Pair { unsigned long a, b; }; + +struct Pair +test (struct Pair p) +{ + p.a &= 0xffffffff; + p.a <<= 2; + p.b &= 0xffffffff; + p.b <<= 2; + return p; +} diff --git a/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc.c b/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc.c index 3f197755625..9788a094f51 100644 --- a/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc.c +++ b/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc.c @@ -53,7 +53,7 @@ t3 (long x, long y) /* **t4: ** bstrpick.d (\$r[0-9]+),\$r4,31,0 -** slli.d \$r4,\1,1 +** alsl.d \$r4,\1,\$r0,1 ** jr \$r1 */ unsigned long diff --git a/gcc/testsuite/gcc.target/loongarch/bstrpick_alsl_paired.c b/gcc/testsuite/gcc.target/loongarch/bstrpick_alsl_paired.c index 900e8c9e19f..166565275da 100644 --- a/gcc/testsuite/gcc.target/loongarch/bstrpick_alsl_paired.c +++ b/gcc/testsuite/gcc.target/loongarch/bstrpick_alsl_paired.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mabi=lp64d -O2 -fdump-rtl-combine" } */ -/* { dg-final { scan-rtl-dump "{and_shift_reversedi}" "combine" } } */ +/* { dg-final { scan-rtl-dump "{and_alsl_reversedi}" "combine" } } */ /* { dg-final { scan-assembler-not "alsl.d\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,\\\$r0" } } */ struct SA -- 2.48.1