On Wed, 15 Jan 2025, Richard Biener wrote: > The following makes niter analysis recognize a loop with an exit > condition scanning over a STRING_CST. This is done via enhancing > the force evaluation code rather than recognizing for example > strlen (s) as number of iterations because it allows to handle > some more cases. > > STRING_CSTs are easy to handle since nothing can write to them, also > processing those should be cheap. I've refrained from handling > anything besides char8_t. > > Note to avoid the -Warray-bound dianostic we have to either early unroll > the loop (there's no final value replacement done, there's a PR > for doing this as part of CD-DCE when possibly eliding a loop), > or create a canonical IV so we can DCE the loads. The latter is what > the patch does, also avoiding to repeatedly force-evaluate niters. > This also makes final value replacement work again since now ivcanon > is after it. > > There are some testsuite adjustments needed, in particular we now > unroll some loops early, causing messages to appear in different > passes but also vectorization to now no longer happening on > outer loops. The changes mitigate that. > > Bootstrapped and tested on x86_64-unknown-linux-gnu. I'll wait for > CI to pick this up and still appreciate a look at the STRING_CST > handling details.
Pushed as r15-6990-g44d21551362f90 > Thanks, > Richard. > > PR tree-optimization/92539 > * tree-ssa-loop-ivcanon.cc (tree_unroll_loops_completely_1): > Also try force-evaluation if ivcanon did not yet run. > (canonicalize_loop_induction_variables): > When niter was computed constant by force evaluation add a > canonical IV if we didn't unroll. > * tree-ssa-loop-niter.cc (loop_niter_by_eval): When we > don't find a proper PHI try if the exit condition scans > over a STRING_CST and simulate that. > > * g++.dg/warn/Warray-bounds-pr92539.C: New testcase. > * gcc.dg/tree-ssa/sccp-16.c: New testcase. > * g++.dg/vect/pr87621.cc: Use larger power to avoid > inner loop unrolling. > * gcc.dg/vect/pr89440.c: Use larger loop bound to avoid > inner loop unrolling. > * gcc.dg/pr77975.c: Scan cunrolli dump and adjust. > --- > gcc/testsuite/g++.dg/vect/pr87621.cc | 2 +- > .../g++.dg/warn/Warray-bounds-pr92539.C | 51 +++++++++++++++++++ > gcc/testsuite/gcc.dg/pr77975.c | 6 +-- > gcc/testsuite/gcc.dg/tree-ssa/sccp-16.c | 16 ++++++ > gcc/testsuite/gcc.dg/vect/pr89440.c | 4 +- > gcc/tree-ssa-loop-ivcanon.cc | 11 ++-- > gcc/tree-ssa-loop-niter.cc | 47 ++++++++++++++++- > 7 files changed, 127 insertions(+), 10 deletions(-) > create mode 100644 gcc/testsuite/g++.dg/warn/Warray-bounds-pr92539.C > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sccp-16.c > > diff --git a/gcc/testsuite/g++.dg/vect/pr87621.cc > b/gcc/testsuite/g++.dg/vect/pr87621.cc > index cfc53be4ee1..bc55116ccbf 100644 > --- a/gcc/testsuite/g++.dg/vect/pr87621.cc > +++ b/gcc/testsuite/g++.dg/vect/pr87621.cc > @@ -21,7 +21,7 @@ T pow(T x, unsigned int n) > void testVec(int* x) > { > for (int i = 0; i < 8; ++i) > - x[i] = pow(x[i], 10); > + x[i] = pow(x[i], 100); > } > > /* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" { target { > vect_double && vect_hw_misalign } } } } */ > diff --git a/gcc/testsuite/g++.dg/warn/Warray-bounds-pr92539.C > b/gcc/testsuite/g++.dg/warn/Warray-bounds-pr92539.C > new file mode 100644 > index 00000000000..ea506ed1450 > --- /dev/null > +++ b/gcc/testsuite/g++.dg/warn/Warray-bounds-pr92539.C > @@ -0,0 +1,51 @@ > +// { dg-do compile { target c++11 } } > +// { dg-options "-O2 -Warray-bounds" } > + > +static bool > +ischar(int ch) > +{ > + return (0 == (ch & ~0xff) || ~0 == (ch | 0xff)) != 0; > +} > + > +static bool eat(char const*& first, char const* last) > +{ > + if (first != last && ischar(*first)) { // { dg-bogus "bounds" } > + ++first; > + return true; > + } > + return false; > +} > + > +static bool eat_two(char const*& first, char const* last) > +{ > + auto save = first; > + if (eat(first, last) && eat(first, last)) > + return true; > + first = save; > + return false; > +} > + > +static bool foo(char const*& first, char const* last) > +{ > + auto local_iterator = first; > + int i = 0; > + for (; i < 3; ++i) > + if (!eat_two(local_iterator, last)) > + return false; > + first = local_iterator; > + return true; > +} > + > +static bool test(char const* in, bool full_match = true) > +{ > + auto last = in; > + while (*last) > + ++last; > + return foo(in, last) && (!full_match || (in == last)); // { dg-bogus > "bounds" } > +} > + > +int main() > +{ > + return test("aa"); > +} > + > diff --git a/gcc/testsuite/gcc.dg/pr77975.c b/gcc/testsuite/gcc.dg/pr77975.c > index a187ce2b50c..9d7aad49841 100644 > --- a/gcc/testsuite/gcc.dg/pr77975.c > +++ b/gcc/testsuite/gcc.dg/pr77975.c > @@ -1,8 +1,8 @@ > /* PR tree-optimization/77975 */ > /* { dg-do compile } */ > -/* { dg-options "-O2 -fdump-tree-ivcanon-details" } */ > +/* { dg-options "-O2 -fdump-tree-cunrolli-details" } */ > > -/* { dg-final { scan-tree-dump-times "Proved that loop 1 iterates 1 times > using brute force" 1 "ivcanon" } } */ > +/* { dg-final { scan-tree-dump-times "Proved that loop 1 iterates 2 times > using brute force" 1 "cunrolli" } } */ > > unsigned int > foo (unsigned int *b) > @@ -17,7 +17,7 @@ foo (unsigned int *b) > return a; > } > > -/* { dg-final { scan-tree-dump-times "Proved that loop 1 iterates 2 times > using brute force" 1 "ivcanon" } } */ > +/* { dg-final { scan-tree-dump-times "Proved that loop 1 iterates 3 times > using brute force" 1 "cunrolli" } } */ > > unsigned int > bar (unsigned int *b) > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sccp-16.c > b/gcc/testsuite/gcc.dg/tree-ssa/sccp-16.c > new file mode 100644 > index 00000000000..c35426fc8c4 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sccp-16.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-cunrolli -fdump-tree-sccp-details" } */ > + > +int foo () > +{ > + const char *s = "Hello World!"; > + int len = 0; > + while (s[len]) > + ++len; > + return len; > +} > + > +/* For cunrolli the growth is too large, but it should add a canonical IV > + and SCCP peform final value replacement. */ > +/* { dg-final { scan-tree-dump "ivtmp\[^\r\n\]*PHI\[^\r\n\]*13" "cunrolli" } > } */ > +/* { dg-final { scan-tree-dump "with expr: 12" "sccp" } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/pr89440.c > b/gcc/testsuite/gcc.dg/vect/pr89440.c > index d84527edb44..06049696912 100644 > --- a/gcc/testsuite/gcc.dg/vect/pr89440.c > +++ b/gcc/testsuite/gcc.dg/vect/pr89440.c > @@ -10,7 +10,7 @@ f (float x) > float a = 0; > for (i = 0; i < 4; ++i) > { > - for (j = 0; j < 4; ++j) > + for (j = 0; j < 32; ++j) > { > a += 1; > x += a; > @@ -23,7 +23,7 @@ int > main() > { > check_vect (); > - if (f (1.0f) != 137.0f) > + if (f (1.0f) != 8257.0f) > abort (); > return 0; > } > diff --git a/gcc/tree-ssa-loop-ivcanon.cc b/gcc/tree-ssa-loop-ivcanon.cc > index 6e6569e5713..d07b3d593f5 100644 > --- a/gcc/tree-ssa-loop-ivcanon.cc > +++ b/gcc/tree-ssa-loop-ivcanon.cc > @@ -1257,6 +1257,7 @@ canonicalize_loop_induction_variables (class loop *loop, > bool modified = false; > class tree_niter_desc niter_desc; > bool may_be_zero = false; > + bool by_eval = false; > > /* For unrolling allow conditional constant or zero iterations, thus > perform loop-header copying on-the-fly. */ > @@ -1291,7 +1292,11 @@ canonicalize_loop_induction_variables (class loop > *loop, > if (try_eval > && (chrec_contains_undetermined (niter) > || TREE_CODE (niter) != INTEGER_CST)) > - niter = find_loop_niter_by_eval (loop, &exit); > + { > + niter = find_loop_niter_by_eval (loop, &exit); > + if (TREE_CODE (niter) == INTEGER_CST) > + by_eval = true; > + } > > if (TREE_CODE (niter) != INTEGER_CST) > exit = NULL; > @@ -1346,7 +1351,7 @@ canonicalize_loop_induction_variables (class loop *loop, > innermost_cunrolli_p)) > return true; > > - if (create_iv > + if ((create_iv || by_eval) > && niter && !chrec_contains_undetermined (niter) > && exit && just_once_each_iteration_p (loop, exit->src)) > { > @@ -1492,7 +1497,7 @@ tree_unroll_loops_completely_1 (bool may_increase_size, > bool unroll_outer, > ul = UL_NO_GROWTH; > > if (canonicalize_loop_induction_variables > - (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer, > + (loop, false, ul, !flag_tree_loop_ivcanon || cunrolli, unroll_outer, > innermost, cunrolli)) > { > /* If we'll continue unrolling, we need to propagate constants > diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc > index 9e966266ca3..0e2d361ae8e 100644 > --- a/gcc/tree-ssa-loop-niter.cc > +++ b/gcc/tree-ssa-loop-niter.cc > @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see > #include "cfgloop.h" > #include "tree-chrec.h" > #include "tree-scalar-evolution.h" > +#include "tree-data-ref.h" > #include "tree-dfa.h" > #include "internal-fn.h" > #include "gimple-range.h" > @@ -3595,7 +3596,51 @@ loop_niter_by_eval (class loop *loop, edge exit) > { > phi = get_base_for (loop, op[j]); > if (!phi) > - return chrec_dont_know; > + { > + gassign *def; > + if (j == 0 > + && (cmp == NE_EXPR || cmp == EQ_EXPR) > + && TREE_CODE (op[0]) == SSA_NAME > + && TREE_CODE (op[1]) == INTEGER_CST > + && (def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op[0]))) > + && gimple_assign_rhs_code (def) == MEM_REF) > + { > + tree mem = gimple_assign_rhs1 (def); > + affine_iv iv; > + if (TYPE_MODE (TREE_TYPE (mem)) == TYPE_MODE (char_type_node) > + && simple_iv (loop, loop, > + TREE_OPERAND (mem, 0), &iv, false) > + && tree_fits_uhwi_p (TREE_OPERAND (mem, 1)) > + && tree_fits_uhwi_p (iv.step)) > + { > + tree str, off; > + /* iv.base can be &"Foo" but also (char *)&"Foo" + 1. */ > + split_constant_offset (iv.base, &str, &off); > + STRIP_NOPS (str); > + if (TREE_CODE (str) == ADDR_EXPR > + && TREE_CODE (TREE_OPERAND (str, 0)) == STRING_CST > + && tree_fits_uhwi_p (off)) > + { > + str = TREE_OPERAND (str, 0); > + unsigned i = 0; > + for (unsigned HOST_WIDE_INT idx > + = (tree_to_uhwi (TREE_OPERAND (mem, 1)) > + + tree_to_uhwi (off)); > + idx < (unsigned)TREE_STRING_LENGTH (str) > + && i < MAX_ITERATIONS_TO_TRACK; > + idx += tree_to_uhwi (iv.step), ++i) > + { > + int res = compare_tree_int > + (op[1], TREE_STRING_POINTER (str)[idx]); > + if ((cmp == NE_EXPR && res == 0) > + || (cmp == EQ_EXPR && res != 0)) > + return build_int_cst (unsigned_type_node, i); > + } > + } > + } > + } > + return chrec_dont_know; > + } > val[j] = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); > next[j] = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); > } > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)