Hi!

As the testcase in the patch shows, if exact memcpy or memset count
is unknown, but max_size is smaller than epilogue_size_needed,
ix86_expand_set_or_movmem can ICE.

The following patch fixes that, bootstrapped/regtested on x86_64-linux
and i686-linux, ok for trunk?

Though, the resulting code doesn't look very good, as everything is expanded
as just epilogue of the copying/memset, I think the probabilities on the
branches expect that all bits of the remaining size are 0 after the main
loop (which isn't done in this case).

2013-11-26  Jakub Jelinek  <ja...@redhat.com>

        PR target/59229
        * config/i386/i386.c (device_alg): Fix up formatting.
        (ix86_expand_set_or_movmem): Handle max_size < epilogue_size_needed
        similarly to count && count < epilogue_size_needed.  Fix up
        comment typo.
        * builtins.c (determine_block_size): Fix comment typo.

        * gcc.c-torture/execute/pr59229.c: New test.

--- gcc/config/i386/i386.c.jj   2013-11-25 18:30:18.000000000 +0100
+++ gcc/config/i386/i386.c      2013-11-26 11:27:38.116198901 +0100
@@ -23453,7 +23453,8 @@ decide_alg (HOST_WIDE_INT count, HOST_WI
   /* If expected size is not known but max size is small enough
      so inline version is a win, set expected size into
      the range.  */
-  if (max > 1 && (unsigned HOST_WIDE_INT)max >= max_size && expected_size == 
-1)
+  if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size
+      && expected_size == -1)
     expected_size = min_size / 2 + max_size / 2;
 
   /* If user specified the algorithm, honnor it if possible.  */
@@ -23752,7 +23753,7 @@ ix86_expand_set_or_movmem (rtx dst, rtx
   bool noalign;
   enum machine_mode move_mode = VOIDmode;
   int unroll_factor = 1;
-  /* TODO: Once vlaue ranges are available, fill in proper data.  */
+  /* TODO: Once value ranges are available, fill in proper data.  */
   unsigned HOST_WIDE_INT min_size = 0;
   unsigned HOST_WIDE_INT max_size = -1;
   unsigned HOST_WIDE_INT probable_max_size = -1;
@@ -23967,21 +23968,19 @@ ix86_expand_set_or_movmem (rtx dst, rtx
         loop variant.  */
       if (issetmem && epilogue_size_needed > 2 && !promoted_val)
        force_loopy_epilogue = true;
-      if (count)
+      if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
+         || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
        {
-         if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
-           {
-             /* If main algorithm works on QImode, no epilogue is needed.
-                For small sizes just don't align anything.  */
-             if (size_needed == 1)
-               desired_align = align;
-             else
-               goto epilogue;
-           }
+         /* If main algorithm works on QImode, no epilogue is needed.
+            For small sizes just don't align anything.  */
+         if (size_needed == 1)
+           desired_align = align;
+         else
+           goto epilogue;
        }
-      else if (min_size < (unsigned HOST_WIDE_INT)epilogue_size_needed)
+      else if (!count
+              && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
        {
-         gcc_assert (max_size >= (unsigned HOST_WIDE_INT)epilogue_size_needed);
          label = gen_label_rtx ();
          emit_cmp_and_jump_insns (count_exp,
                                   GEN_INT (epilogue_size_needed),
--- gcc/builtins.c.jj   2013-11-22 21:03:07.000000000 +0100
+++ gcc/builtins.c      2013-11-26 11:15:11.992044093 +0100
@@ -3146,7 +3146,7 @@ determine_block_size (tree len, rtx len_
        }
       else if (range_type == VR_ANTI_RANGE)
        {
-         /* Anti range 0...N lets us to determine minmal size to N+1.  */
+         /* Anti range 0...N lets us to determine minimal size to N+1.  */
          if (min.is_zero ())
            {
              if ((max + double_int_one).fits_uhwi ())
@@ -3156,7 +3156,7 @@ determine_block_size (tree len, rtx len_
 
             int n;
             if (n < 100)
-              memcpy (a,b, n)
+              memcpy (a, b, n)
 
             Produce anti range allowing negative values of N.  We still
             can use the information and make a guess that N is not negative.
--- gcc/testsuite/gcc.c-torture/execute/pr59229.c.jj    2013-11-26 
11:32:07.590806813 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr59229.c       2013-11-26 
11:31:56.000000000 +0100
@@ -0,0 +1,29 @@
+int i;
+
+__attribute__((noinline, noclone)) void
+bar (char *p)
+{
+  if (i < 1 || i > 6)
+    __builtin_abort ();
+  if (__builtin_memcmp (p, "abcdefg", i + 1) != 0)
+    __builtin_abort ();
+  __builtin_memset (p, ' ', 7);
+}
+
+__attribute__((noinline, noclone)) void
+foo (char *p, unsigned long l)
+{
+  if (l < 1 || l > 6)
+    return;
+  char buf[7];
+  __builtin_memcpy (buf, p, l + 1);
+  bar (buf);
+}
+
+int
+main ()
+{
+  for (i = 0; i < 16; i++)
+    foo ("abcdefghijklmnop", i);
+  return 0;
+}

        Jakub

Reply via email to