Hi! The following patch handles VECTOR_TYPE_P CONSTRUCTORs in count_nonzero_bytes, including handling them if they have some elements non-constant. If there are still some constant elements before it (in the range queried), we derive info at least from those bytes and consider the rest as unknown.
The first 3 hunks just punt in IMHO problematic cases, the spaghetti code considers byte_size 0 as unknown size, determine yourself, so if offset is equal to exp size, there are 0 bytes to consider (so nothing useful to determine), but using byte_size 0 would mean use any size. Similarly, native_encode_expr uses int type for offset (and size), so padding it offset larger than INT_MAX could be silent miscompilation. I've guarded the test to just a couple of targets known to handle it, because e.g. on ia32 without -msse forwprop1 seems to lower the CONSTRUCTOR into 4 BIT_FIELD_REF stores and I haven't figured out on what exactly that depends on (e.g. powerpc* is fine on any CPUs, even with -mno-altivec -mno-vsx, even -m32). Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2024-11-30 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/117057 * tree-ssa-strlen.cc (strlen_pass::count_nonzero_bytes): Punt also when byte_size is equal to offset or nchars. Punt if offset is bigger than INT_MAX. Handle vector CONSTRUCTOR with some elements constant, possibly followed by non-constant. * gcc.dg/strlenopt-32.c: Remove xfail and vect_slp_v2qi_store_unalign specific scan-tree-dump-times directive. * gcc.dg/strlenopt-96.c: New test. --- gcc/tree-ssa-strlen.cc.jj 2024-11-23 13:00:31.476982808 +0100 +++ gcc/tree-ssa-strlen.cc 2024-11-29 22:53:17.465837084 +0100 @@ -4629,7 +4629,7 @@ strlen_pass::count_nonzero_bytes (tree e return false; unsigned HOST_WIDE_INT byte_size = tree_to_uhwi (size); - if (byte_size < offset) + if (byte_size <= offset) return false; nbytes = byte_size - offset; @@ -4682,7 +4682,7 @@ strlen_pass::count_nonzero_bytes (tree e if (TREE_CODE (exp) == STRING_CST) { unsigned nchars = TREE_STRING_LENGTH (exp); - if (nchars < offset) + if (nchars <= offset) return false; if (!nbytes) @@ -4700,7 +4700,7 @@ strlen_pass::count_nonzero_bytes (tree e unsigned char buf[256]; if (!prep) { - if (CHAR_BIT != 8 || BITS_PER_UNIT != 8) + if (CHAR_BIT != 8 || BITS_PER_UNIT != 8 || offset > INT_MAX) return false; /* If the pointer to representation hasn't been set above for STRING_CST point it at the buffer. */ @@ -4710,11 +4710,75 @@ strlen_pass::count_nonzero_bytes (tree e unsigned repsize = native_encode_expr (exp, buf, sizeof buf, offset); if (repsize < nbytes) { - /* This should only happen when REPSIZE is zero because EXP - doesn't denote an object with a known initializer, except - perhaps when the reference reads past its end. */ - lenrange[0] = 0; - prep = NULL; + /* Handle vector { 0x12345678, 0x23003412, x_1(D), y_2(D) } + and similar cases. Even when not all the elements are constant, + we can perhaps figure out something from the constant ones + and assume the others can be anything. */ + if (TREE_CODE (exp) == CONSTRUCTOR + && CONSTRUCTOR_NELTS (exp) + && VECTOR_TYPE_P (TREE_TYPE (exp)) + && nbytes <= sizeof buf) + { + tree v0 = CONSTRUCTOR_ELT (exp, 0)->value; + unsigned HOST_WIDE_INT elt_sz + = int_size_in_bytes (TREE_TYPE (v0)); + unsigned int i, s = 0; + tree v, idx; + FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (exp), i, idx, v) + { + if (idx + && (VECTOR_TYPE_P (TREE_TYPE (v0)) + || !tree_fits_uhwi_p (idx) + || tree_to_uhwi (idx) != i)) + { + s = 0; + break; + } + if ((i + 1) * elt_sz <= offset) + continue; + unsigned int o = 0; + if (i * elt_sz < offset) + o = offset % elt_sz; + repsize = native_encode_expr (v, buf + s, + sizeof (buf) - s, o); + if (repsize != elt_sz - o) + break; + s += repsize; + } + if (s != 0 && s < nbytes) + { + unsigned HOST_WIDE_INT n = strnlen (prep, s); + if (n < lenrange[0]) + lenrange[0] = n; + if (lenrange[1] < n && n != s) + lenrange[1] = n; + if (lenrange[2] < nbytes) + lenrange[2] = nbytes; + /* We haven't processed all bytes, the rest are unknown. + So, clear NULTERM if none of the initial bytes are + zero, and clear ALLNUL and ALLNONNULL because we don't + know about the remaining bytes. */ + if (n == s) + *nulterm = false; + *allnul = false; + *allnonnul = false; + return true; + } + else if (s != nbytes) + { + /* See below. */ + lenrange[0] = 0; + prep = NULL; + } + } + else + { + /* This should only happen when REPSIZE is zero because EXP + doesn't denote an object with a known initializer, except + perhaps when the reference reads past its end. */ + lenrange[0] = 0; + prep = NULL; + } } else if (!nbytes) nbytes = repsize; --- gcc/testsuite/gcc.dg/strlenopt-32.c.jj 2024-10-11 11:30:38.865389752 +0200 +++ gcc/testsuite/gcc.dg/strlenopt-32.c 2024-11-29 15:52:20.296109588 +0100 @@ -190,5 +190,4 @@ main () return 0; } -/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen1" { xfail vect_slp_v2qi_store_unalign } } } */ -/* { dg-final { scan-tree-dump-times "strlen \\(" 2 "strlen1" { target vect_slp_v2qi_store_unalign } } } */ +/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen1" } } */ --- gcc/testsuite/gcc.dg/strlenopt-96.c.jj 2024-11-29 15:50:37.233571779 +0100 +++ gcc/testsuite/gcc.dg/strlenopt-96.c 2024-11-30 01:45:29.638067059 +0100 @@ -0,0 +1,42 @@ +/* PR tree-optimization/117057 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* { dg-additional-options "-msse" { target { i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "return \[0-9\]*;" 4 "optimized" { target i?86-*-* x86_64-*-* aarch64*-*-* powerpc*-*-* } } } */ + +#include "strlenopt.h" + +typedef unsigned int V __attribute__((vector_size (2 * sizeof (int)))); +typedef unsigned int W __attribute__((vector_size (4 * sizeof (int)))); + +size_t +foo (void) +{ + char a[64]; + *(long long *) a = 0x12003456789abcdeULL; + return strlen (a); +} + +size_t +bar (void) +{ + char a[64]; + *(V *) a = (V) { 0x12345678U, 0x9a00bcdeU }; + return strlen (a); +} + +size_t +baz (unsigned int x) +{ + char a[64]; + *(V *) a = (V) { 0x12005678U, x }; + return strlen (a); +} + +size_t +qux (unsigned int x) +{ + char a[64]; + *(W *)a = (W) { 0x12345678U, 0x9abcdef0U, 0x12005678U, x }; + return strlen (a); +} Jakub