PR target/87079 reported that with -Os, the nios2 back end was emitting
an inferior code sequence for widening multiply instead of using mulx.
I tracked this down to the rtx costs hook not recognizing the RTL
pattern for <mul>sidi3 so it would overestimate the cost.
I've been aware for a while that the RTX costs computation in the nios2
backend is far from optimal or even correct :-P but giving it a complete
workover is a pretty big project requiring benchmarking etc as well as
unit tests. I don't want the perfect to be the enemy of the good, so
I've checked in the attached patch to fix this issue and add the test
case (both -Os and -O2 variants).
-Sandra
2018-11-03 Sandra Loosemore <san...@codesourcery.com>
PR target/87079
gcc/
* config/nios2/nios2.c (nios2_rtx_costs): Recognize <mul>sidi3
pattern.
gcc/testsuite/
* gcc.target/nios2/pr87079-1.c: New.
* gcc.target/nios2/pr87079-2.c: New.
Index: gcc/config/nios2/nios2.c
===================================================================
--- gcc/config/nios2/nios2.c (revision 265561)
+++ gcc/config/nios2/nios2.c (working copy)
@@ -1539,6 +1539,19 @@ nios2_rtx_costs (rtx x, machine_mode mod
*total = COSTS_N_INSNS (2); /* Latency adjustment. */
else
*total = COSTS_N_INSNS (1);
+ if (TARGET_HAS_MULX && GET_MODE (x) == DImode)
+ {
+ enum rtx_code c0 = GET_CODE (XEXP (x, 0));
+ enum rtx_code c1 = GET_CODE (XEXP (x, 1));
+ if ((c0 == SIGN_EXTEND && c1 == SIGN_EXTEND)
+ || (c0 == ZERO_EXTEND && c1 == ZERO_EXTEND))
+ /* This is the <mul>sidi3 pattern, which expands into 4 insns,
+ 2 multiplies and 2 moves. */
+ {
+ *total = *total * 2 + COSTS_N_INSNS (2);
+ return true;
+ }
+ }
return false;
}
Index: gcc/testsuite/gcc.target/nios2/pr87079-1.c
===================================================================
--- gcc/testsuite/gcc.target/nios2/pr87079-1.c (nonexistent)
+++ gcc/testsuite/gcc.target/nios2/pr87079-1.c (working copy)
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -mhw-div -mhw-mul -mhw-mulx" } */
+
+#include <stdint.h>
+#include <stddef.h>
+
+void foo(const uint8_t* str, uint32_t* res)
+{
+ uint32_t rdVal0, rdVal1, rdVal2;
+ rdVal0 = rdVal1 = rdVal2 = 0;
+ unsigned c;
+ for (;;) {
+ c = *str++;
+ unsigned dig = c - '0';
+ if (dig > 9)
+ break; // non-digit
+ uint64_t x10;
+
+ x10 = (uint64_t)rdVal0*10 + dig;
+ rdVal0 = (uint32_t)x10;
+ dig = (uint32_t)(x10 >> 32);
+
+ x10 = (uint64_t)rdVal1*10 + dig;
+ rdVal1 = (uint32_t)x10;
+ dig = (uint32_t)(x10 >> 32);
+
+ rdVal2 = rdVal2*10 + dig;
+ }
+ res[0] = rdVal0;
+ res[1] = rdVal1;
+ res[2] = rdVal2;
+}
+
+/* { dg-final { scan-assembler-times "mulxuu\t" 2 } } */
Index: gcc/testsuite/gcc.target/nios2/pr87079-2.c
===================================================================
--- gcc/testsuite/gcc.target/nios2/pr87079-2.c (nonexistent)
+++ gcc/testsuite/gcc.target/nios2/pr87079-2.c (working copy)
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mhw-div -mhw-mul -mhw-mulx" } */
+
+#include <stdint.h>
+#include <stddef.h>
+
+void foo(const uint8_t* str, uint32_t* res)
+{
+ uint32_t rdVal0, rdVal1, rdVal2;
+ rdVal0 = rdVal1 = rdVal2 = 0;
+ unsigned c;
+ for (;;) {
+ c = *str++;
+ unsigned dig = c - '0';
+ if (dig > 9)
+ break; // non-digit
+ uint64_t x10;
+
+ x10 = (uint64_t)rdVal0*10 + dig;
+ rdVal0 = (uint32_t)x10;
+ dig = (uint32_t)(x10 >> 32);
+
+ x10 = (uint64_t)rdVal1*10 + dig;
+ rdVal1 = (uint32_t)x10;
+ dig = (uint32_t)(x10 >> 32);
+
+ rdVal2 = rdVal2*10 + dig;
+ }
+ res[0] = rdVal0;
+ res[1] = rdVal1;
+ res[2] = rdVal2;
+}
+
+/* { dg-final { scan-assembler-times "mulxuu\t" 2 } } */