Pushed to r16-4644.

Thanks!

在 2025/10/25 上午10:43, Guo Jie 写道:
Equalize the rtx cost of mulw.d.w[u] with mul.d.

There are two main benefits:
1. Reducing the number of instructions.
     -       lu32i.d    $r12,0
     -       bstrpick.d $r4,$r4,31,0
     -       mul.d      $r4,$r4,$r12
     -       srli.d     $r4,$r4,33
     ---
     +       mulh.wu    $r4,$r4,$r12
     +       bstrpick.d $r4,$r4,31,1

2. Help with the replacement of the high-latency div.w.
     -       addi.w     $r12,$r0,3
     -       div.w      $r4,$r4,$r12
     ---
     +       lu12i.w    $r13,349525
     +       ori                $r13,$r13,1366
     +       mulw.d.w   $r12,$r4,$r13
     +       srai.w     $r4,$r4,31
     +       srli.d     $r12,$r12,32
     +       sub.w      $r4,$r12,$r4

gcc/ChangeLog:

        * config/loongarch/loongarch.cc (loongarch_rtx_costs):
        Ignore the cost impact of SIGN_EXTEND/ZERO_EXTEND.

gcc/testsuite/ChangeLog:

        * gcc.target/loongarch/widen-mul-rtx-cost-signed.c: New test.
        * gcc.target/loongarch/widen-mul-rtx-cost-unsigned.c: New test.
---
  gcc/config/loongarch/loongarch.cc                   | 11 +++++++++++
  .../loongarch/widen-mul-rtx-cost-signed.c           | 13 +++++++++++++
  .../loongarch/widen-mul-rtx-cost-unsigned.c         | 11 +++++++++++
  3 files changed, 35 insertions(+)
  create mode 100644 
gcc/testsuite/gcc.target/loongarch/widen-mul-rtx-cost-signed.c
  create mode 100644 
gcc/testsuite/gcc.target/loongarch/widen-mul-rtx-cost-unsigned.c

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 3fe8c766cc7..d73479745f2 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4025,6 +4025,17 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
        *total = loongarch_cost->int_mult_di;
        else
        *total = loongarch_cost->int_mult_si;
+
+      /* Check for mul_widen.  */
+      if ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
+          && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
+         || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+             && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND))
+       {
+         *total += (set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed)
+                    + set_src_cost (XEXP (XEXP (x, 1), 0), mode, speed));
+         return true;
+       }
        return false;
case DIV:
diff --git a/gcc/testsuite/gcc.target/loongarch/widen-mul-rtx-cost-signed.c 
b/gcc/testsuite/gcc.target/loongarch/widen-mul-rtx-cost-signed.c
new file mode 100644
index 00000000000..1e1e75f2a99
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/widen-mul-rtx-cost-signed.c
@@ -0,0 +1,13 @@
+/* Verify optimization for mulw.d.w,
+   which can help with the replacement of the high-latency div.w.  */
+/* { dg-do compile { target { loongarch64*-*-* } } } */
+/* { dg-options "-O3" } */
+
+int
+test (int a)
+{
+  return a / 3;
+}
+
+/* { dg-final { scan-assembler {\tmulw.d.w\t} } } */
+/* { dg-final { scan-assembler-not {\tdiv.w\t} } } */
diff --git a/gcc/testsuite/gcc.target/loongarch/widen-mul-rtx-cost-unsigned.c 
b/gcc/testsuite/gcc.target/loongarch/widen-mul-rtx-cost-unsigned.c
new file mode 100644
index 00000000000..32a428f8c62
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/widen-mul-rtx-cost-unsigned.c
@@ -0,0 +1,11 @@
+/* Verify optimization for mulh.wu, which can reduce insns.  */
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+int
+test (unsigned int a)
+{
+  return a / 3;
+}
+
+/* { dg-final { scan-assembler {\tmulh.wu\t} } } */

Reply via email to