Hi! As the testcase in the patch shows, if exact memcpy or memset count is unknown, but max_size is smaller than epilogue_size_needed, ix86_expand_set_or_movmem can ICE.
The following patch fixes that, bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Though, the resulting code doesn't look very good, as everything is expanded as just epilogue of the copying/memset, I think the probabilities on the branches expect that all bits of the remaining size are 0 after the main loop (which isn't done in this case). 2013-11-26 Jakub Jelinek <ja...@redhat.com> PR target/59229 * config/i386/i386.c (device_alg): Fix up formatting. (ix86_expand_set_or_movmem): Handle max_size < epilogue_size_needed similarly to count && count < epilogue_size_needed. Fix up comment typo. * builtins.c (determine_block_size): Fix comment typo. * gcc.c-torture/execute/pr59229.c: New test. --- gcc/config/i386/i386.c.jj 2013-11-25 18:30:18.000000000 +0100 +++ gcc/config/i386/i386.c 2013-11-26 11:27:38.116198901 +0100 @@ -23453,7 +23453,8 @@ decide_alg (HOST_WIDE_INT count, HOST_WI /* If expected size is not known but max size is small enough so inline version is a win, set expected size into the range. */ - if (max > 1 && (unsigned HOST_WIDE_INT)max >= max_size && expected_size == -1) + if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size + && expected_size == -1) expected_size = min_size / 2 + max_size / 2; /* If user specified the algorithm, honnor it if possible. */ @@ -23752,7 +23753,7 @@ ix86_expand_set_or_movmem (rtx dst, rtx bool noalign; enum machine_mode move_mode = VOIDmode; int unroll_factor = 1; - /* TODO: Once vlaue ranges are available, fill in proper data. */ + /* TODO: Once value ranges are available, fill in proper data. */ unsigned HOST_WIDE_INT min_size = 0; unsigned HOST_WIDE_INT max_size = -1; unsigned HOST_WIDE_INT probable_max_size = -1; @@ -23967,21 +23968,19 @@ ix86_expand_set_or_movmem (rtx dst, rtx loop variant. */ if (issetmem && epilogue_size_needed > 2 && !promoted_val) force_loopy_epilogue = true; - if (count) + if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed) + || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed) { - if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed) - { - /* If main algorithm works on QImode, no epilogue is needed. - For small sizes just don't align anything. */ - if (size_needed == 1) - desired_align = align; - else - goto epilogue; - } + /* If main algorithm works on QImode, no epilogue is needed. + For small sizes just don't align anything. */ + if (size_needed == 1) + desired_align = align; + else + goto epilogue; } - else if (min_size < (unsigned HOST_WIDE_INT)epilogue_size_needed) + else if (!count + && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed) { - gcc_assert (max_size >= (unsigned HOST_WIDE_INT)epilogue_size_needed); label = gen_label_rtx (); emit_cmp_and_jump_insns (count_exp, GEN_INT (epilogue_size_needed), --- gcc/builtins.c.jj 2013-11-22 21:03:07.000000000 +0100 +++ gcc/builtins.c 2013-11-26 11:15:11.992044093 +0100 @@ -3146,7 +3146,7 @@ determine_block_size (tree len, rtx len_ } else if (range_type == VR_ANTI_RANGE) { - /* Anti range 0...N lets us to determine minmal size to N+1. */ + /* Anti range 0...N lets us to determine minimal size to N+1. */ if (min.is_zero ()) { if ((max + double_int_one).fits_uhwi ()) @@ -3156,7 +3156,7 @@ determine_block_size (tree len, rtx len_ int n; if (n < 100) - memcpy (a,b, n) + memcpy (a, b, n) Produce anti range allowing negative values of N. We still can use the information and make a guess that N is not negative. --- gcc/testsuite/gcc.c-torture/execute/pr59229.c.jj 2013-11-26 11:32:07.590806813 +0100 +++ gcc/testsuite/gcc.c-torture/execute/pr59229.c 2013-11-26 11:31:56.000000000 +0100 @@ -0,0 +1,29 @@ +int i; + +__attribute__((noinline, noclone)) void +bar (char *p) +{ + if (i < 1 || i > 6) + __builtin_abort (); + if (__builtin_memcmp (p, "abcdefg", i + 1) != 0) + __builtin_abort (); + __builtin_memset (p, ' ', 7); +} + +__attribute__((noinline, noclone)) void +foo (char *p, unsigned long l) +{ + if (l < 1 || l > 6) + return; + char buf[7]; + __builtin_memcpy (buf, p, l + 1); + bar (buf); +} + +int +main () +{ + for (i = 0; i < 16; i++) + foo ("abcdefghijklmnop", i); + return 0; +} Jakub