The uarch can fuse bstrpick.d rd,rs1,31,0 and alsl.d rd,rd,rs2,shamt,
so for this special case we should use alsl.d instead of slli.d.  And
I'd hoped late combine to handle slli.d + and + add.d => and + slli.d +
add.d => and + alsl.d, but it does not always work (even before the
alsl.d special case gets in the way).  So let's handle this on our own.

The fix is partial: to make the macro-fusion really work we need to
implement TARGET_SCHED_MACRO_FUSION_PAIR_P for it.  Thus the
bitwise-shift-reassoc-fuse.c case now xfail.

gcc/ChangeLog:

        * config/loongarch/loongarch.md (<optab>_shift_reverse<X:mode>):
        Emit alsl.d instead of slli.d if new mask is 0xffffffff and
        shamt is const_immalsl_operand.
        (<optab>_alsl_reverse<X:mode>): New define_insn_and_split.

gcc/testsuite/ChangeLog:

        * gcc.target/loongarch/bitwise-shift-reassoc-dual.c: New test.
        * gcc.target/loongarch/bitwise-shift-reassoc-fuse.c: New test.
        * gcc.target/loongarch/bitwise-shift-reassoc.c (t4): Match
        alsl.d instead of slli.d.
        * gcc.target/loongarch/bstrpick_alsl_paired.c (scan-tree-dump):
        Match and_alsl_reversedi instead of and_shift_reversedi.
---
 gcc/config/loongarch/loongarch.md             | 55 +++++++++++++++++--
 .../loongarch/bitwise-shift-reassoc-dual.c    | 18 ++++++
 .../loongarch/bitwise-shift-reassoc-fuse.c    | 16 ++++++
 .../loongarch/bitwise-shift-reassoc.c         |  2 +-
 .../loongarch/bstrpick_alsl_paired.c          |  2 +-
 5 files changed, 85 insertions(+), 8 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc-dual.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc-fuse.c

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 1392325038c..2728d5e4d1c 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -3138,8 +3138,54 @@ (define_insn_and_split "<optab>_shift_reverse<X:mode>"
                                       <MODE>mode)"
   "#"
   "&& true"
+  [(set (match_dup 0) (ashift:X (match_dup 0) (match_dup 2)))]
+  {
+    operands[3] = loongarch_reassoc_shift_bitwise (<is_and>,
+                                                  operands[2],
+                                                  operands[3],
+                                                  <MODE>mode);
+
+    if (ins_zero_bitmask_operand (operands[3], <MODE>mode))
+      {
+       gcc_checking_assert (<is_and>);
+       emit_move_insn (operands[0], operands[1]);
+       operands[1] = operands[0];
+      }
+
+    emit_insn (gen_<optab><mode>3 (operands[0], operands[1], operands[3]));
+
+    if (<is_and>
+       && TARGET_64BIT
+       && si_mask_operand (operands[3], DImode)
+       && const_immalsl_operand (operands[2], SImode))
+      {
+       /* Special case for bstrpick.d + alsl.d fusion
+          TODO: TARGET_SCHED_MACRO_FUSION_PAIR_P */
+       emit_insn (gen_alsldi3 (operands[0], operands[0],
+                               operands[2], gen_rtx_REG (DImode, 0)));
+       DONE;
+      }
+  })
+
+;; The late_combine2 pass can handle slli.d + add.d => alsl.d, but it seems
+;; not covering all cases, and obviously it's broken by the special case
+;; using alsl.d rd,rd,r0,shamt instead of slli.d rd,rd,shamt.  So
+;; implement slli.d + and + add.d => and + alsl.d on our own.
+(define_insn_and_split "<optab>_alsl_reverse<X:mode>"
+  [(set (match_operand:X 0 "register_operand" "=&r")
+       (plus:X
+         (any_bitwise:X
+           (ashift:X (match_operand:X  1 "register_operand" "r0")
+                     (match_operand:SI 2 "const_immalsl_operand" "i"))
+           (match_operand:X 3 "const_int_operand" "i"))
+         (match_operand:X 4 "register_operand" "r")))]
+  "loongarch_reassoc_shift_bitwise (<is_and>, operands[2], operands[3],
+                                   <MODE>mode)"
+  "#"
+  "&& reload_completed"
   [(set (match_dup 0) (any_bitwise:X (match_dup 1) (match_dup 3)))
-   (set (match_dup 0) (ashift:X (match_dup 0) (match_dup 2)))]
+   (set (match_dup 0) (plus:X (ashift:X (match_dup 0) (match_dup 2))
+                             (match_dup 4)))]
   {
     operands[3] = loongarch_reassoc_shift_bitwise (<is_and>,
                                                   operands[2],
@@ -3154,11 +3200,8 @@ (define_insn_and_split "<optab>_shift_reverse<X:mode>"
       }
   })
 
-;; The late_combine2 pass can handle slli.d + add.d => alsl.d, so we
-;; already have slli.d + any_bitwise + add.d => any_bitwise + slli.d +
-;; add.d => any_bitwise + alsl.d.  But late_combine2 cannot handle slli.d +
-;; add.w => alsl.w, so implement slli.d + and + add.w => and + alsl.w on
-;; our own.
+;; Likewise for slli.d + and + add.w => and + alsl.w, note that late
+;; combine cannot help this at all.
 (define_insn_and_split "<optab>_alsl_reversesi_extended"
   [(set (match_operand:DI 0 "register_operand" "=&r")
        (sign_extend:DI
diff --git a/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc-dual.c 
b/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc-dual.c
new file mode 100644
index 00000000000..ad66daf6caa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc-dual.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "bstrpick\\.\[wd\]" 2 } } */
+/* { dg-final { scan-assembler-times "alsl\\.\[wd\]" 2 } } */
+
+struct Pair { unsigned long a, b; };
+
+struct Pair
+test (struct Pair p, unsigned long x)
+{
+  p.a &= 0xfffffff;
+  p.a <<= 2;
+  p.a += x;
+  p.b &= 0xfffffff;
+  p.b <<= 2;
+  p.b += x;
+  return p;
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc-fuse.c 
b/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc-fuse.c
new file mode 100644
index 00000000000..a75c2f0ce12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc-fuse.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */
+/* { dg-final { scan-assembler-times "alsl.d" 2 } } */
+/* { dg-final { scan-assembler-times "bstrpick\\.d\[^\n\]*\n\talsl.d" 2 { 
xfail *-*-* } } } */
+
+struct Pair { unsigned long a, b; };
+
+struct Pair
+test (struct Pair p)
+{
+  p.a &= 0xffffffff;
+  p.a <<= 2;
+  p.b &= 0xffffffff;
+  p.b <<= 2;
+  return p;
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc.c 
b/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc.c
index 3f197755625..9788a094f51 100644
--- a/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc.c
+++ b/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc.c
@@ -53,7 +53,7 @@ t3 (long x, long y)
 /*
 **t4:
 **     bstrpick.d      (\$r[0-9]+),\$r4,31,0
-**     slli.d  \$r4,\1,1
+**     alsl.d  \$r4,\1,\$r0,1
 **     jr      \$r1
 */
 unsigned long
diff --git a/gcc/testsuite/gcc.target/loongarch/bstrpick_alsl_paired.c 
b/gcc/testsuite/gcc.target/loongarch/bstrpick_alsl_paired.c
index 900e8c9e19f..166565275da 100644
--- a/gcc/testsuite/gcc.target/loongarch/bstrpick_alsl_paired.c
+++ b/gcc/testsuite/gcc.target/loongarch/bstrpick_alsl_paired.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mabi=lp64d -O2 -fdump-rtl-combine" } */
-/* { dg-final { scan-rtl-dump "{and_shift_reversedi}" "combine" } } */
+/* { dg-final { scan-rtl-dump "{and_alsl_reversedi}" "combine" } } */
 /* { dg-final { scan-assembler-not 
"alsl.d\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,\\\$r0" } } */
 
 struct SA
-- 
2.48.1

Reply via email to