We have to emit check for main loop execution UNLESS we guarantee that min_size is at least equal to size_needed. In this testcase, VRP declared minimum copying size, and after unaligned prologue adjustment, min_size was still non-zero. The !min_size check didn't account for the case that min_size can be non-zero, but still less than size_needed.
2015-07-25 Uros Bizjak <ubiz...@gmail.com> PR target/66648 * config/i386/i386.c (ix86_expand_set_or_movmem): Emit main loop execution guard when min_size is less than size_needed. testsuite/ChangeLog: 2015-07-25 Uros Bizjak <ubiz...@gmail.com> PR target/66648 * gcc.target/i386/pr66648.c: New test. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Committed to mainline, will be backported to release branches. Uros.
Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 224993) +++ config/i386/i386.c (working copy) @@ -25008,7 +25008,8 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx c dst = change_address (dst, BLKmode, destreg); set_mem_align (dst, desired_align * BITS_PER_UNIT); epilogue_size_needed = 0; - if (need_zero_guard && !min_size) + if (need_zero_guard + && min_size < (unsigned HOST_WIDE_INT) size_needed) { /* It is possible that we copied enough so the main loop will not execute. */ @@ -25140,7 +25141,7 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx c max_size -= align_bytes; } if (need_zero_guard - && !min_size + && min_size < (unsigned HOST_WIDE_INT) size_needed && (count < (unsigned HOST_WIDE_INT) size_needed || (align_bytes == 0 && count < ((unsigned HOST_WIDE_INT) size_needed Index: testsuite/gcc.target/pr66648.c =================================================================== --- testsuite/gcc.target/pr66648.c (revision 0) +++ testsuite/gcc.target/pr66648.c (working copy) @@ -0,0 +1,33 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mstringop-strategy=unrolled_loop -mtune=nocona" } */ + +#define PATTERN 0xdeadbeef +#define SIZE 32 + +struct S { int i; char str[SIZE]; int j; }; + +void __attribute__((noclone, noinline)) +my_memcpy (char *, const char *, unsigned int); + +void +my_memcpy (char *dst, const char *src, unsigned int len) +{ + if (len < 8) + __builtin_abort (); + + __builtin_memcpy (dst, src, len); +} + +int +main (void) +{ + const char str[SIZE]= "1234567890123456789012345678901"; + struct S *s = __builtin_malloc (sizeof (struct S)); + + s->j = PATTERN; + my_memcpy (s->str, str, SIZE); + if (s->j != PATTERN) + __builtin_abort (); + + return 0; +}