Equalize the rtx cost of mulw.d.w[u] with mul.d.

There are two main benefits:
1. Reducing the number of instructions.
    -       lu32i.d     $r12,0
    -       bstrpick.d  $r4,$r4,31,0
    -       mul.d       $r4,$r4,$r12
    -       srli.d      $r4,$r4,33
    ---
    +       mulh.wu     $r4,$r4,$r12
    +       bstrpick.d  $r4,$r4,31,1

2. Help with the replacement of the high-latency div.w.
    -       addi.w      $r12,$r0,3
    -       div.w       $r4,$r4,$r12
    ---
    +       lu12i.w     $r13,349525
    +       ori         $r13,$r13,1366
    +       mulw.d.w    $r12,$r4,$r13
    +       srai.w      $r4,$r4,31
    +       srli.d      $r12,$r12,32
    +       sub.w       $r4,$r12,$r4

gcc/ChangeLog:

        * config/loongarch/loongarch.cc (loongarch_rtx_costs):
        Ignore the cost impact of SIGN_EXTEND/ZERO_EXTEND.

gcc/testsuite/ChangeLog:

        * gcc.target/loongarch/widen-mul-rtx-cost-signed.c: New test.
        * gcc.target/loongarch/widen-mul-rtx-cost-unsigned.c: New test.
---
 gcc/config/loongarch/loongarch.cc                   | 11 +++++++++++
 .../loongarch/widen-mul-rtx-cost-signed.c           | 13 +++++++++++++
 .../loongarch/widen-mul-rtx-cost-unsigned.c         | 11 +++++++++++
 3 files changed, 35 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/widen-mul-rtx-cost-signed.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/widen-mul-rtx-cost-unsigned.c

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 3fe8c766cc7..d73479745f2 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4025,6 +4025,17 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
        *total = loongarch_cost->int_mult_di;
       else
        *total = loongarch_cost->int_mult_si;
+
+      /* Check for mul_widen.  */
+      if ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
+          && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
+         || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+             && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND))
+       {
+         *total += (set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed)
+                    + set_src_cost (XEXP (XEXP (x, 1), 0), mode, speed));
+         return true;
+       }
       return false;
 
     case DIV:
diff --git a/gcc/testsuite/gcc.target/loongarch/widen-mul-rtx-cost-signed.c 
b/gcc/testsuite/gcc.target/loongarch/widen-mul-rtx-cost-signed.c
new file mode 100644
index 00000000000..1e1e75f2a99
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/widen-mul-rtx-cost-signed.c
@@ -0,0 +1,13 @@
+/* Verify optimization for mulw.d.w,
+   which can help with the replacement of the high-latency div.w.  */
+/* { dg-do compile { target { loongarch64*-*-* } } } */
+/* { dg-options "-O3" } */
+
+int
+test (int a)
+{
+  return a / 3;
+}
+
+/* { dg-final { scan-assembler {\tmulw.d.w\t} } } */
+/* { dg-final { scan-assembler-not {\tdiv.w\t} } } */
diff --git a/gcc/testsuite/gcc.target/loongarch/widen-mul-rtx-cost-unsigned.c 
b/gcc/testsuite/gcc.target/loongarch/widen-mul-rtx-cost-unsigned.c
new file mode 100644
index 00000000000..32a428f8c62
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/widen-mul-rtx-cost-unsigned.c
@@ -0,0 +1,11 @@
+/* Verify optimization for mulh.wu, which can reduce insns.  */
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+int
+test (unsigned int a)
+{
+  return a / 3;
+}
+
+/* { dg-final { scan-assembler {\tmulh.wu\t} } } */
-- 
2.50.0

Reply via email to