https://gcc.gnu.org/g:6bebb3b0f3fa422d248a1c2075da0598a44c7320
commit r15-5821-g6bebb3b0f3fa422d248a1c2075da0598a44c7320 Author: Jakub Jelinek <ja...@redhat.com> Date: Sat Nov 30 11:30:08 2024 +0100 strlen: Handle vector CONSTRUCTORs [PR117057] The following patch handles VECTOR_TYPE_P CONSTRUCTORs in count_nonzero_bytes, including handling them if they have some elements non-constant. If there are still some constant elements before it (in the range queried), we derive info at least from those bytes and consider the rest as unknown. The first 3 hunks just punt in IMHO problematic cases, the spaghetti code considers byte_size 0 as unknown size, determine yourself, so if offset is equal to exp size, there are 0 bytes to consider (so nothing useful to determine), but using byte_size 0 would mean use any size. Similarly, native_encode_expr uses int type for offset (and size), so padding it offset larger than INT_MAX could be silent miscompilation. I've guarded the test to just a couple of targets known to handle it, because e.g. on ia32 without -msse forwprop1 seems to lower the CONSTRUCTOR into 4 BIT_FIELD_REF stores and I haven't figured out on what exactly that depends on (e.g. powerpc* is fine on any CPUs, even with -mno-altivec -mno-vsx, even -m32). 2024-11-30 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/117057 * tree-ssa-strlen.cc (strlen_pass::count_nonzero_bytes): Punt also when byte_size is equal to offset or nchars. Punt if offset is bigger than INT_MAX. Handle vector CONSTRUCTOR with some elements constant, possibly followed by non-constant. * gcc.dg/strlenopt-32.c: Remove xfail and vect_slp_v2qi_store_unalign specific scan-tree-dump-times directive. * gcc.dg/strlenopt-96.c: New test. Diff: --- gcc/testsuite/gcc.dg/strlenopt-32.c | 3 +- gcc/testsuite/gcc.dg/strlenopt-96.c | 42 +++++++++++++++++++ gcc/tree-ssa-strlen.cc | 80 +++++++++++++++++++++++++++++++++---- 3 files changed, 115 insertions(+), 10 deletions(-) diff --git a/gcc/testsuite/gcc.dg/strlenopt-32.c b/gcc/testsuite/gcc.dg/strlenopt-32.c index c53168570fdb..4220314fb3f0 100644 --- a/gcc/testsuite/gcc.dg/strlenopt-32.c +++ b/gcc/testsuite/gcc.dg/strlenopt-32.c @@ -190,5 +190,4 @@ main () return 0; } -/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen1" { xfail vect_slp_v2qi_store_unalign } } } */ -/* { dg-final { scan-tree-dump-times "strlen \\(" 2 "strlen1" { target vect_slp_v2qi_store_unalign } } } */ +/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen1" } } */ diff --git a/gcc/testsuite/gcc.dg/strlenopt-96.c b/gcc/testsuite/gcc.dg/strlenopt-96.c new file mode 100644 index 000000000000..14347fa5eb54 --- /dev/null +++ b/gcc/testsuite/gcc.dg/strlenopt-96.c @@ -0,0 +1,42 @@ +/* PR tree-optimization/117057 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* { dg-additional-options "-msse" { target { i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "return \[0-9\]*;" 4 "optimized" { target i?86-*-* x86_64-*-* aarch64*-*-* powerpc*-*-* } } } */ + +#include "strlenopt.h" + +typedef unsigned int V __attribute__((vector_size (2 * sizeof (int)))); +typedef unsigned int W __attribute__((vector_size (4 * sizeof (int)))); + +size_t +foo (void) +{ + char a[64]; + *(long long *) a = 0x12003456789abcdeULL; + return strlen (a); +} + +size_t +bar (void) +{ + char a[64]; + *(V *) a = (V) { 0x12345678U, 0x9a00bcdeU }; + return strlen (a); +} + +size_t +baz (unsigned int x) +{ + char a[64]; + *(V *) a = (V) { 0x12005678U, x }; + return strlen (a); +} + +size_t +qux (unsigned int x) +{ + char a[64]; + *(W *)a = (W) { 0x12345678U, 0x9abcdef0U, 0x12005678U, x }; + return strlen (a); +} diff --git a/gcc/tree-ssa-strlen.cc b/gcc/tree-ssa-strlen.cc index 5249af064bfb..c637959f8426 100644 --- a/gcc/tree-ssa-strlen.cc +++ b/gcc/tree-ssa-strlen.cc @@ -4629,7 +4629,7 @@ strlen_pass::count_nonzero_bytes (tree exp, tree vuse, gimple *stmt, return false; unsigned HOST_WIDE_INT byte_size = tree_to_uhwi (size); - if (byte_size < offset) + if (byte_size <= offset) return false; nbytes = byte_size - offset; @@ -4682,7 +4682,7 @@ strlen_pass::count_nonzero_bytes (tree exp, tree vuse, gimple *stmt, if (TREE_CODE (exp) == STRING_CST) { unsigned nchars = TREE_STRING_LENGTH (exp); - if (nchars < offset) + if (nchars <= offset) return false; if (!nbytes) @@ -4700,7 +4700,7 @@ strlen_pass::count_nonzero_bytes (tree exp, tree vuse, gimple *stmt, unsigned char buf[256]; if (!prep) { - if (CHAR_BIT != 8 || BITS_PER_UNIT != 8) + if (CHAR_BIT != 8 || BITS_PER_UNIT != 8 || offset > INT_MAX) return false; /* If the pointer to representation hasn't been set above for STRING_CST point it at the buffer. */ @@ -4710,11 +4710,75 @@ strlen_pass::count_nonzero_bytes (tree exp, tree vuse, gimple *stmt, unsigned repsize = native_encode_expr (exp, buf, sizeof buf, offset); if (repsize < nbytes) { - /* This should only happen when REPSIZE is zero because EXP - doesn't denote an object with a known initializer, except - perhaps when the reference reads past its end. */ - lenrange[0] = 0; - prep = NULL; + /* Handle vector { 0x12345678, 0x23003412, x_1(D), y_2(D) } + and similar cases. Even when not all the elements are constant, + we can perhaps figure out something from the constant ones + and assume the others can be anything. */ + if (TREE_CODE (exp) == CONSTRUCTOR + && CONSTRUCTOR_NELTS (exp) + && VECTOR_TYPE_P (TREE_TYPE (exp)) + && nbytes <= sizeof buf) + { + tree v0 = CONSTRUCTOR_ELT (exp, 0)->value; + unsigned HOST_WIDE_INT elt_sz + = int_size_in_bytes (TREE_TYPE (v0)); + unsigned int i, s = 0; + tree v, idx; + FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (exp), i, idx, v) + { + if (idx + && (VECTOR_TYPE_P (TREE_TYPE (v0)) + || !tree_fits_uhwi_p (idx) + || tree_to_uhwi (idx) != i)) + { + s = 0; + break; + } + if ((i + 1) * elt_sz <= offset) + continue; + unsigned int o = 0; + if (i * elt_sz < offset) + o = offset % elt_sz; + repsize = native_encode_expr (v, buf + s, + sizeof (buf) - s, o); + if (repsize != elt_sz - o) + break; + s += repsize; + } + if (s != 0 && s < nbytes) + { + unsigned HOST_WIDE_INT n = strnlen (prep, s); + if (n < lenrange[0]) + lenrange[0] = n; + if (lenrange[1] < n && n != s) + lenrange[1] = n; + if (lenrange[2] < nbytes) + lenrange[2] = nbytes; + /* We haven't processed all bytes, the rest are unknown. + So, clear NULTERM if none of the initial bytes are + zero, and clear ALLNUL and ALLNONNULL because we don't + know about the remaining bytes. */ + if (n == s) + *nulterm = false; + *allnul = false; + *allnonnul = false; + return true; + } + else if (s != nbytes) + { + /* See below. */ + lenrange[0] = 0; + prep = NULL; + } + } + else + { + /* This should only happen when REPSIZE is zero because EXP + doesn't denote an object with a known initializer, except + perhaps when the reference reads past its end. */ + lenrange[0] = 0; + prep = NULL; + } } else if (!nbytes) nbytes = repsize;