Hi!
As the testcase shows, for the memset (x, 0, y); snippet which handles
y from 16 to 31 inclusive for some tunings, we generate:
.L36:
movq $0, (%rdi)
movq $0, 8(%rdi)
movq $0, -8(%rsi,%rdi)
which is correct only for y from 16 to 24 inclusive, if y is 25 to 31,
we clear the first 16 bytes and last 8 bytes of the buffer, but would leave
1 to 7 bytes untouched in between that.
With this patch we emit:
.L36:
movq $0, (%rdi)
movq $0, 8(%rdi)
movq $0, -16(%rsi,%rdi)
movq $0, -8(%rsi,%rdi)
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2014-01-21 Jakub Jelinek <[email protected]>
PR target/59003
* config/i386/i386.c (expand_small_movmem_or_setmem): If mode is
smaller than size, perform several stores or loads and stores
at dst + count - size to store or copy all of size bytes, rather
than just last modesize bytes.
* gcc.dg/tree-prof/pr59003.c: New test.
--- gcc/config/i386/i386.c.jj 2014-01-20 19:12:56.000000000 +0100
+++ gcc/config/i386/i386.c 2014-01-21 16:46:19.965094839 +0100
@@ -23397,16 +23397,24 @@ expand_small_movmem_or_setmem (rtx destm
}
destmem = offset_address (destmem, count, 1);
- destmem = offset_address (destmem, GEN_INT (-size - GET_MODE_SIZE (mode)),
+ destmem = offset_address (destmem, GEN_INT (-2 * size),
GET_MODE_SIZE (mode));
- if (issetmem)
- emit_move_insn (destmem, gen_lowpart (mode, value));
- else
+ if (!issetmem)
{
srcmem = offset_address (srcmem, count, 1);
- srcmem = offset_address (srcmem, GEN_INT (-size - GET_MODE_SIZE (mode)),
+ srcmem = offset_address (srcmem, GEN_INT (-2 * size),
GET_MODE_SIZE (mode));
- emit_move_insn (destmem, srcmem);
+ }
+ for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
+ {
+ if (issetmem)
+ emit_move_insn (destmem, gen_lowpart (mode, value));
+ else
+ {
+ emit_move_insn (destmem, srcmem);
+ srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
+ }
+ destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
}
emit_jump_insn (gen_jump (done_label));
emit_barrier ();
--- gcc/testsuite/gcc.dg/tree-prof/pr59003.c.jj 2014-01-21 15:43:58.004385383
+0100
+++ gcc/testsuite/gcc.dg/tree-prof/pr59003.c 2014-01-21 15:45:51.000000000
+0100
@@ -0,0 +1,29 @@
+/* PR target/59003 */
+/* { dg-options "-O2" } */
+/* { dg-options "-O2 -mtune=amdfam10" { target i?86-*-* x86_64-*-* } } */
+
+__attribute__((noinline, noclone)) void *
+foo (void *p, unsigned int q)
+{
+ return __builtin_memset (p, 0, q * 4UL);
+}
+
+char buf[128] __attribute__((aligned (32)));
+
+int
+main ()
+{
+ int i;
+ for (i = 0; i < 100000; i++)
+ foo (buf + 4, 1 + (i & 1));
+ for (i = 0; i < 128; i++)
+ {
+ buf[i] = 'X';
+ asm volatile ("" : : : "memory");
+ }
+ foo (buf + 32, 7);
+ for (i = 0; i < 128; i++)
+ if (buf[i] != ((i < 32 || i >= 32 + 28) ? 'X' : 0))
+ __builtin_abort ();
+ return 0;
+}
Jakub