On 8/15/20 8:19 AM, Christophe Lyon wrote:
Hi Martin,


On Sat, 15 Aug 2020 at 01:14, Martin Sebor via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:

On 8/13/20 11:44 AM, Martin Sebor wrote:
On 8/13/20 10:21 AM, Jeff Law wrote:
On Fri, 2020-07-31 at 17:55 -0600, Martin Sebor via Gcc-patches wrote:
The folders for these functions (and some others) call c_getsr
which relies on string_constant to return the representation of
constant strings.  Because the function doesn't handle constants
of other types, including aggregates, memcmp or memchr calls
involving those are not folded when they could be.

The attached patch extends the algorithm used by string_constant
to also handle constant aggregates involving elements or members
of the same types as native_encode_expr.  (The change restores
the empty initializer optimization inadvertently disabled in
the fix for pr96058.)

To avoid accidentally misusing either string_constant or c_getstr
with non-strings I have introduced a pair of new functions to get
the representation of those: byte_representation and getbyterep.

Tested on x86_64-linux.

Martin

PR tree-optimization/78257 - missing memcmp optimization with
constant arrays

gcc/ChangeLog:

     PR middle-end/78257
     * builtins.c (expand_builtin_memory_copy_args): Rename called
function.
     (expand_builtin_stpcpy_1): Remove argument from call.
     (expand_builtin_memcmp): Rename called function.
     (inline_expand_builtin_bytecmp): Same.
     * expr.c (convert_to_bytes): New function.
     (constant_byte_string): New function (formerly string_constant).
     (string_constant): Call constant_byte_string.
     (byte_representation): New function.
     * expr.h (byte_representation): Declare.
     * fold-const-call.c (fold_const_call): Rename called function.
     * fold-const.c (c_getstr): Remove an argument.
     (getbyterep): Define a new function.
     * fold-const.h (c_getstr): Remove an argument.
     (getbyterep): Declare a new function.
     * gimple-fold.c (gimple_fold_builtin_memory_op): Rename callee.
     (gimple_fold_builtin_string_compare): Same.
     (gimple_fold_builtin_memchr): Same.

gcc/testsuite/ChangeLog:

     PR middle-end/78257
     * gcc.dg/memchr.c: New test.
     * gcc.dg/memcmp-2.c: New test.
     * gcc.dg/memcmp-3.c: New test.
     * gcc.dg/memcmp-4.c: New test.

diff --git a/gcc/expr.c b/gcc/expr.c
index a150fa0d3b5..a124df54655 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -11594,15 +11594,103 @@ is_aligning_offset (const_tree offset,
const_tree exp)
     /* This must now be the address of EXP.  */
     return TREE_CODE (offset) == ADDR_EXPR && TREE_OPERAND (offset,
0) == exp;
   }
-
-/* Return the tree node if an ARG corresponds to a string constant
or zero
-   if it doesn't.  If we return nonzero, set *PTR_OFFSET to the
(possibly
-   non-constant) offset in bytes within the string that ARG is
accessing.
-   If MEM_SIZE is non-zero the storage size of the memory is returned.
-   If DECL is non-zero the constant declaration is returned if
available.  */
-tree
-string_constant (tree arg, tree *ptr_offset, tree *mem_size, tree
*decl)
+/* If EXPR is a constant initializer (either an expression or
CONSTRUCTOR),
+   attempt to obtain its native representation as an array of
nonzero BYTES.
+   Return true on success and false on failure (the latter without
modifying
+   BYTES).  */
+
+static bool
+convert_to_bytes (tree type, tree expr, vec<unsigned char> *bytes)
+{
+  if (TREE_CODE (expr) == CONSTRUCTOR)
+    {
+      /* Set to the size of the CONSTRUCTOR elements.  */
+      unsigned HOST_WIDE_INT ctor_size = bytes->length ();
+
+      if (TREE_CODE (type) == ARRAY_TYPE)
+    {
+      tree val, idx;
+      tree eltype = TREE_TYPE (type);
+      unsigned HOST_WIDE_INT elsize =
+        tree_to_uhwi (TYPE_SIZE_UNIT (eltype));
+      unsigned HOST_WIDE_INT i, last_idx = HOST_WIDE_INT_M1U;
+      FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (expr), i, idx, val)
+        {
+          /* Append zeros for elements with no initializers.  */
+          if (!tree_fits_uhwi_p (idx))
+        return false;
+          unsigned HOST_WIDE_INT cur_idx = tree_to_uhwi (idx);
+          if (unsigned HOST_WIDE_INT size = cur_idx - (last_idx + 1))
+        {
+          size = size * elsize + bytes->length ();
+          bytes->safe_grow_cleared (size);
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

+        }
+
+          if (!convert_to_bytes (eltype, val, bytes))
+        return false;
+
+          last_idx = cur_idx;
+        }
+    }
+      else if (TREE_CODE (type) == RECORD_TYPE)
+    {
+      tree val, fld;
+      unsigned HOST_WIDE_INT i;
+      FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (expr), i, fld, val)
+        {
+          /* Append zeros for members with no initializers and
+         any padding.  */
+          unsigned HOST_WIDE_INT cur_off = int_byte_position (fld);
+          if (bytes->length () < cur_off)
+        bytes->safe_grow_cleared (cur_off);
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+          if (!convert_to_bytes (TREE_TYPE (val), val, bytes))
+        return false;
+        }
+    }
+      else
+    return false;
+
+      /* Compute the size of the COSNTRUCTOR elements.  */
+      ctor_size = bytes->length () - ctor_size;
+
+      /* Append zeros to the byte vector to the full size of the type.
+     The type size can be less than the size of the CONSTRUCTOR
+     if the latter contains initializers for a flexible array
+     member.  */
+      tree size = TYPE_SIZE_UNIT (type);
+      unsigned HOST_WIDE_INT type_size = tree_to_uhwi (size);
+      if (ctor_size < type_size)
+    if (unsigned HOST_WIDE_INT size_grow = type_size - ctor_size)
+      bytes->safe_grow_cleared (bytes->length () + size_grow);
+
+      return true;
+    }
So I think you need to be more careful with CONSTRUCTOR nodes here.
Not all
elements of an object need to appear in the CONSTRUCTOR.  Elements
which do not
appear in the CONSTRUCTOR node are considered zero-initialized, unless
CONSTRUCTOR_NO_CLEARING is set.

I don't see anything in the code above which deals with those oddities of
CONSTRUCTOR nodes.  Did I miss it?

Just capturing for reference what we just discussed off list:

The underlined code above zeroes out the bytes of elements with
no initializers as well as any padding between fields.  It doesn't
consider CONSTRUCTOR_NO_CLEARING.  I didn't know about that bit so
I looked it up.  According to the internals manual:

    Unrepresented fields will be cleared (zeroed), unless the
    CONSTRUCTOR_NO_CLEARING flag is set, in which case their value
    becomes undefined.

So assuming they're zero should be fine, as would doing nothing.
We agreed on the former so I will go ahead with the patch as is.

I had missed a few Ada failures.  Apparently, Ada can specify
arbitrary array bounds (not just upper but also lower) but
the code assumed the lower bound would always be zero.  I've
adjusted it to avoid making that assumption and committed
the updated revision in r11-2709.


This commit is causing a regression on arm:
FAIL:    gcc.dg/strlenopt-55.c scan-tree-dump-times gimple "memcmp" 0
FAIL:    gcc.dg/strlenopt-55.c scan-tree-dump-times optimized
"call_in_true_branch_not_eliminated" 0

Thanks for letting me know!  (The failure was also subsequently
reported in PR 96665.)

The test fails because the new byte_representation() function isn't
prepared to deal with strings longer than the size of the small local
buffer it passes to native_encode_expr().  When the buffer isn't big
enough for the whole string, native_encode_expr() encodes only as
much as fits and returns the number of encoded bytes.

A simple fix is straightforward but I think a better solution would
be to call native_encode_initializer() that already has most of
the same smarts.  I didn't notice the function until this failure
was reported. Unfortunately, it has (at least) two limitations:

1) it doesn't distinguish a failure from "end of encoding"
2) it doesn't handle initializers for structs with flexible array
   members

(1) means that it can't be called in a loop on any arbitrary
    initializer until the whole thing is encoded.  There's no way
    to tell if the function failed to convert a member of a struct
    or that there simply is nothing else to convert.
(2) means that using it as is would a number of other regressions
    like the one in in strlenopt-55.c.

A fix for (2) is localized to just native_encode_initializer()
(and attached) so that's what I plan to commit unless there are
concerns/suggestions for changes.

Although strictly not necessary, a fix for (1) should involve
changing all other native_encode_xxx() functions to return -1
on failure for consistency, and updating their callers, which
is on the order of 70 places.  I'll think about proposing that
separately.

Martin
PR middle-end/96665 - memcmp of a constant string not folded

gcc/ChangeLog:

	PR middle-end/96665
	* expr.c (convert_to_bytes): Replace statically allocated buffer with
	a dynamically allocated one of sufficient size.

gcc/testsuite/ChangeLog:

	PR middle-end/78257
	* gcc.dg/memcmp-5.c: New test.

diff --git a/gcc/expr.c b/gcc/expr.c
index dd2200ddea8..437faeaba08 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -11683,16 +11683,27 @@ convert_to_bytes (tree type, tree expr, vec<unsigned char> *bytes)
       return true;
     }
 
-  unsigned char charbuf[MAX_BITSIZE_MODE_ANY_MODE / BITS_PER_UNIT];
-  int len = native_encode_expr (expr, charbuf, sizeof charbuf, 0);
-  if (len <= 0)
+  /* Except for RECORD_TYPE which may have an initialized flexible array
+     member, the size of a type is the same as the size of the initializer
+     (including any implicitly zeroed out members and padding).  Allocate
+     just enough for that many bytes.  */
+  tree expr_size = TYPE_SIZE_UNIT (TREE_TYPE (expr));
+  if (!expr_size || !tree_fits_uhwi_p (expr_size))
+    return false;
+  const unsigned HOST_WIDE_INT expr_bytes = tree_to_uhwi (expr_size);
+  const unsigned bytes_sofar = bytes->length ();
+  /* native_encode_expr can convert at most INT_MAX bytes.  vec is limited
+     to at most UINT_MAX.  */
+  if (bytes_sofar + expr_bytes > INT_MAX)
     return false;
 
-  unsigned n = bytes->length ();
-  bytes->safe_grow (n + len);
-  unsigned char *p = bytes->address ();
-  memcpy (p + n, charbuf, len);
-  return true;
+  /* Unlike for RECORD_TYPE, there is no need to clear the memory since
+     it's completely overwritten by native_encode_expr.  */
+  bytes->safe_grow (bytes_sofar + expr_bytes);
+  unsigned char *pnext = bytes->begin () + bytes_sofar;
+  int nbytes = native_encode_expr (expr, pnext, expr_bytes, 0);
+  /* NBYTES is zero on failure.  Otherwise it should equal EXPR_BYTES.  */
+  return (unsigned HOST_WIDE_INT) nbytes == expr_bytes;
 }
 
 /* Return a STRING_CST corresponding to ARG's constant initializer either
diff --git a/gcc/testsuite/gcc.dg/memcmp-5.c b/gcc/testsuite/gcc.dg/memcmp-5.c
new file mode 100644
index 00000000000..34bae92f6b0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/memcmp-5.c
@@ -0,0 +1,72 @@
+/* PR middle-end/78257 - missing memcmp optimization with constant arrays
+   { dg-do compile }
+   { dg-options "-O -Wall -fdump-tree-optimized" } */
+
+#define A "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" \
+          "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" \
+          "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" \
+          "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" \
+          "0"
+
+const char a257[sizeof A - 1] = A;
+const char a258[sizeof A] = A;
+
+_Static_assert (sizeof A == 258);
+_Static_assert (sizeof a257 == 257);
+
+/* Verify that initializers longer than 256 characters (an internal limit
+   on the size of a buffer used to store representations in) are handled.  */
+
+void eq_256plus (void)
+{
+  int n = 0;
+
+  n += __builtin_memcmp (a257,       A,       sizeof a257);
+  n += __builtin_memcmp (a257 +   1, A +   1, sizeof a257 - 1);
+  n += __builtin_memcmp (a257 +   2, A +   2, sizeof a257 - 2);
+  n += __builtin_memcmp (a257 + 127, A + 127, sizeof a257 - 127);
+  n += __builtin_memcmp (a257 + 128, A + 128, sizeof a257 - 128);
+  n += __builtin_memcmp (a257 + 255, A + 255, 2);
+  n += __builtin_memcmp (a257 + 256, A + 256, 1);
+
+  n += __builtin_memcmp (a258,       A,       sizeof a257);
+  n += __builtin_memcmp (a258 +   1, A +   1, sizeof a257 - 1);
+  n += __builtin_memcmp (a258 +   2, A +   2, sizeof a257 - 2);
+  n += __builtin_memcmp (a258 + 127, A + 127, sizeof a257 - 127);
+  n += __builtin_memcmp (a258 + 128, A + 128, sizeof a257 - 128);
+  n += __builtin_memcmp (a258 + 256, A + 256, 2);
+  n += __builtin_memcmp (a258 + 257, A + 257, 1);
+
+  if (n)
+    __builtin_abort ();
+}
+
+#define X "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" \
+          "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" \
+          "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" \
+          "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" \
+          "1"
+
+void lt_256plus (void)
+{
+  int n = 0;
+
+  n += 0 >  __builtin_memcmp (a257,       X,       sizeof a257);
+  n += 0 >  __builtin_memcmp (a257 +   1, X +   1, sizeof a257 - 1);
+  n += 0 >  __builtin_memcmp (a257 +   2, X +   2, sizeof a257 - 2);
+  n += 0 >  __builtin_memcmp (a257 + 127, X + 127, sizeof a257 - 127);
+  n += 0 >  __builtin_memcmp (a257 + 128, X + 128, sizeof a257 - 128);
+  n += 0 >  __builtin_memcmp (a257 + 255, X + 255, 2);
+  n += 0 >  __builtin_memcmp (a257 + 256, X + 256, 1);
+
+  n += 0 >  __builtin_memcmp (a258,       X,       sizeof a258);
+  n += 0 >  __builtin_memcmp (a258 +   1, X +   1, sizeof a258 - 1);
+  n += 0 >  __builtin_memcmp (a258 +   2, X +   2, sizeof a258 - 2);
+  n += 0 >  __builtin_memcmp (a258 + 127, X + 127, sizeof a257 - 127);
+  n += 0 >  __builtin_memcmp (a258 + 128, X + 128, sizeof a257 - 128);
+  n += 0 >  __builtin_memcmp (a258 + 256, X + 256, 2);
+  n += 0 == __builtin_memcmp (a258 + 257, X + 257, 1);
+
+  if (n != 14)
+    __builtin_abort ();
+}

Reply via email to