Replace the instruction costs in loongarch_rtx_cost_data constructor
based on micro-benchmark results on LA464 and LA664.

This allows optimizations like "x * 17" to alsl, and "x * 68" to alsl
and slli.

gcc/ChangeLog:

        PR target/112936
        * config/loongarch/loongarch-def.cc
        (loongarch_rtx_cost_data::loongarch_rtx_cost_data): Update
        instruction costs per micro-benchmark results.
        (loongarch_rtx_cost_optimize_size): Set all instruction costs
        to (COSTS_N_INSNS (1) + 1).
        * config/loongarch/loongarch.cc (loongarch_rtx_costs): Remove
        special case for multiplication when optimizing for size.
        Adjust division cost when TARGET_64BIT && !TARGET_DIV32.
        Account the extra cost when TARGET_CHECK_ZERO_DIV and
        optimizing for speed.

gcc/testsuite/ChangeLog

        PR target/112936
        * gcc.target/loongarch/mul-const-reduction.c: New test.
---
 gcc/config/loongarch/loongarch-def.cc         | 39 ++++++++++---------
 gcc/config/loongarch/loongarch.cc             | 22 +++++------
 .../loongarch/mul-const-reduction.c           | 11 ++++++
 3 files changed, 43 insertions(+), 29 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c

diff --git a/gcc/config/loongarch/loongarch-def.cc 
b/gcc/config/loongarch/loongarch-def.cc
index 6217b19268c..4a8885e8343 100644
--- a/gcc/config/loongarch/loongarch-def.cc
+++ b/gcc/config/loongarch/loongarch-def.cc
@@ -92,15 +92,15 @@ array_tune<loongarch_align> loongarch_cpu_align =
 
 /* Default RTX cost initializer.  */
 loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
-  : fp_add (COSTS_N_INSNS (1)),
-    fp_mult_sf (COSTS_N_INSNS (2)),
-    fp_mult_df (COSTS_N_INSNS (4)),
-    fp_div_sf (COSTS_N_INSNS (6)),
+  : fp_add (COSTS_N_INSNS (5)),
+    fp_mult_sf (COSTS_N_INSNS (5)),
+    fp_mult_df (COSTS_N_INSNS (5)),
+    fp_div_sf (COSTS_N_INSNS (8)),
     fp_div_df (COSTS_N_INSNS (8)),
-    int_mult_si (COSTS_N_INSNS (1)),
-    int_mult_di (COSTS_N_INSNS (1)),
-    int_div_si (COSTS_N_INSNS (4)),
-    int_div_di (COSTS_N_INSNS (6)),
+    int_mult_si (COSTS_N_INSNS (4)),
+    int_mult_di (COSTS_N_INSNS (4)),
+    int_div_si (COSTS_N_INSNS (5)),
+    int_div_di (COSTS_N_INSNS (5)),
     branch_cost (6),
     memory_latency (4) {}
 
@@ -111,18 +111,21 @@ loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
 array_tune<loongarch_rtx_cost_data> loongarch_cpu_rtx_cost_data =
   array_tune<loongarch_rtx_cost_data> ();
 
-/* RTX costs to use when optimizing for size.  */
+/* RTX costs to use when optimizing for size.
+   We use a value slightly larger than COSTS_N_INSNS (1) for all of them
+   because they are slower than simple instructions.  */
+#define COST_COMPLEX_INSN (COSTS_N_INSNS (1) + 1)
 const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size =
   loongarch_rtx_cost_data ()
-    .fp_add_ (4)
-    .fp_mult_sf_ (4)
-    .fp_mult_df_ (4)
-    .fp_div_sf_ (4)
-    .fp_div_df_ (4)
-    .int_mult_si_ (4)
-    .int_mult_di_ (4)
-    .int_div_si_ (4)
-    .int_div_di_ (4);
+    .fp_add_ (COST_COMPLEX_INSN)
+    .fp_mult_sf_ (COST_COMPLEX_INSN)
+    .fp_mult_df_ (COST_COMPLEX_INSN)
+    .fp_div_sf_ (COST_COMPLEX_INSN)
+    .fp_div_df_ (COST_COMPLEX_INSN)
+    .int_mult_si_ (COST_COMPLEX_INSN)
+    .int_mult_di_ (COST_COMPLEX_INSN)
+    .int_div_si_ (COST_COMPLEX_INSN)
+    .int_div_di_ (COST_COMPLEX_INSN);
 
 array_tune<int> loongarch_cpu_issue_rate = array_tune<int> ()
   .set (CPU_NATIVE, 4)
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 754aeb8bfb7..f04b5798f39 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -3787,8 +3787,6 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
        *total = (speed
                  ? loongarch_cost->int_mult_si * 3 + 6
                  : COSTS_N_INSNS (7));
-      else if (!speed)
-       *total = COSTS_N_INSNS (1) + 1;
       else if (mode == DImode)
        *total = loongarch_cost->int_mult_di;
       else
@@ -3823,14 +3821,18 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
 
     case UDIV:
     case UMOD:
-      if (!speed)
-       {
-         *total = COSTS_N_INSNS (loongarch_idiv_insns (mode));
-       }
-      else if (mode == DImode)
+      if (mode == DImode)
        *total = loongarch_cost->int_div_di;
       else
-       *total = loongarch_cost->int_div_si;
+       {
+         *total = loongarch_cost->int_div_si;
+         if (TARGET_64BIT && !TARGET_DIV32)
+           *total += COSTS_N_INSNS (2);
+       }
+
+      if (TARGET_CHECK_ZERO_DIV)
+       *total += COSTS_N_INSNS (2);
+
       return false;
 
     case SIGN_EXTEND:
@@ -3862,9 +3864,7 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
                  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
                      == ZERO_EXTEND))))
        {
-         if (!speed)
-           *total = COSTS_N_INSNS (1) + 1;
-         else if (mode == DImode)
+         if (mode == DImode)
            *total = loongarch_cost->int_mult_di;
          else
            *total = loongarch_cost->int_mult_si;
diff --git a/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c 
b/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c
new file mode 100644
index 00000000000..02d9a4876d8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=la464" } */
+/* { dg-final { scan-assembler "alsl\.w" } } */
+/* { dg-final { scan-assembler "slli\.w" } } */
+/* { dg-final { scan-assembler-not "mul\.w" } } */
+
+int
+test (int a)
+{
+  return a * 68;
+}
-- 
2.43.0

Reply via email to