For large unaligned block move, lib call may have better performance
than expanding.

Let's just return, if length is
        larger than 64 on O32
        larger than 128 on N32/N64.

Here are some test results:

CPU             Aligned O32     N32     N64
Cavium 7130     1       72      192     192
Cavium 7130     2       96      192     192
Cavium 7130     4       +inf    192     192
Cavium 7130     8       +inf    +inf    +inf
LS3A 2000       1       32      32      64
LS3A 2000       2       32      64      64
LS3A 2000       4       +inf    32      48
LS3A 2000       8       +inf    +inf    +inf
LS3A 4000       1       32      64      128
LS3A 4000       2       64      64      128
LS3A 4000       4       192     128     192
LS3A 4000       8       192     +inf    +inf
Ingenic X2000   1       96      -       -
Ingenic X2000   2       96      -       -
Ingenic X2000   4       +inf    -       -
interAptiv      1       64      -       -
interAptiv      2       64      -       -
interAptiv      4       +inf    -       -
P6600           1       64      128     128
P6600           2       64      128     128
P6600           4       256     192     128
P6600           8       +inf    +inf    +inf
P6600           1U      48      48      48      -mno-unaligned-access
P6600           2U      128     96      96      -mno-unaligned-access
P6600           4U      384     768     768     -mno-unaligned-access
P6600           8U      1024    +inf    +inf    -mno-unaligned-access
I6500           1       384     896     640
I6500           2       256     768     512
I6500           4       384     704     512
I6500           8       512     1024    1024
I6500           1U      32      32      32      -mno-unaligned-access
I6500           2U      96      64      64      -mno-unaligned-access
I6500           4U      384     640     768     -mno-unaligned-access
I6500           8U      256     +inf    +inf    -mno-unaligned-access

When the length of block move is larger than this value,
(g)libc call has better performance.

gcc/
        * config/mips/mips.cc (mips_expand_block_move): don't expand
        large unaligned block move.

gcc/testsuite/
        * gcc.target/mips/expand-block-move-large.c: new test.
---
 gcc/config/mips/mips.cc                       |  6 +++++
 .../gcc.target/mips/expand-block-move-large.c | 22 +++++++++++++++++++
 2 files changed, 28 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/mips/expand-block-move-large.c

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index ca297e3c1e5..7a7f4be627f 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -8308,6 +8308,12 @@ mips_expand_block_move (rtx dest, rtx src, rtx length)
          || MEM_ALIGN (dest) < MIPS_MIN_MOVE_MEM_ALIGN))
     return false;
 
+  if ((ISA_HAS_LWL_LWR || ISA_HAS_UNALIGNED_ACCESS)
+      && INTVAL (length) > MIPS_MAX_MOVE_BYTES_STRAIGHT * 2
+      && MEM_ALIGN (src) < BITS_PER_WORD
+      && MEM_ALIGN (dest) < BITS_PER_WORD)
+    return false;
+
   if (INTVAL (length) <= MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER)
     {
       mips_block_move_straight (dest, src, INTVAL (length));
diff --git a/gcc/testsuite/gcc.target/mips/expand-block-move-large.c 
b/gcc/testsuite/gcc.target/mips/expand-block-move-large.c
new file mode 100644
index 00000000000..98705f6c560
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/expand-block-move-large.c
@@ -0,0 +1,22 @@
+/* { dg-final { scan-assembler "memcpy" } } */
+/* { dg-final { scan-assembler-not "lwl" } } */
+/* { dg-final { scan-assembler-not "swl" } } */
+/* { dg-final { scan-assembler-not "lwr" } } */
+/* { dg-final { scan-assembler-not "swr" } } */
+/* { dg-final { scan-assembler-not "ldl" } } */
+/* { dg-final { scan-assembler-not "sdl" } } */
+/* { dg-final { scan-assembler-not "ldr" } } */
+/* { dg-final { scan-assembler-not "sdr" } } */
+
+char a[4097], b[4097];
+#ifdef __mips64
+#define MAX_SIZE 128
+#else
+#define MAX_SIZE 64
+#endif
+
+NOCOMPRESSION void
+foo ()
+{
+  __builtin_memcpy(&a[1], &b[1], MAX_SIZE + 16);
+}
-- 
2.30.2

Reply via email to