On Wed, 15 Jan 2025, Richard Biener wrote:

> The following makes niter analysis recognize a loop with an exit
> condition scanning over a STRING_CST.  This is done via enhancing
> the force evaluation code rather than recognizing for example
> strlen (s) as number of iterations because it allows to handle
> some more cases.
> 
> STRING_CSTs are easy to handle since nothing can write to them, also
> processing those should be cheap.  I've refrained from handling
> anything besides char8_t.
> 
> Note to avoid the -Warray-bound dianostic we have to either early unroll
> the loop (there's no final value replacement done, there's a PR
> for doing this as part of CD-DCE when possibly eliding a loop),
> or create a canonical IV so we can DCE the loads.  The latter is what
> the patch does, also avoiding to repeatedly force-evaluate niters.
> This also makes final value replacement work again since now ivcanon
> is after it.
> 
> There are some testsuite adjustments needed, in particular we now
> unroll some loops early, causing messages to appear in different
> passes but also vectorization to now no longer happening on
> outer loops.  The changes mitigate that.
> 
> Bootstrapped and tested on x86_64-unknown-linux-gnu.  I'll wait for
> CI to pick this up and still appreciate a look at the STRING_CST
> handling details.

Pushed as r15-6990-g44d21551362f90

> Thanks,
> Richard.
> 
>       PR tree-optimization/92539
>       * tree-ssa-loop-ivcanon.cc (tree_unroll_loops_completely_1):
>       Also try force-evaluation if ivcanon did not yet run.
>       (canonicalize_loop_induction_variables):
>       When niter was computed constant by force evaluation add a
>       canonical IV if we didn't unroll.
>       * tree-ssa-loop-niter.cc (loop_niter_by_eval): When we
>       don't find a proper PHI try if the exit condition scans
>       over a STRING_CST and simulate that.
> 
>       * g++.dg/warn/Warray-bounds-pr92539.C: New testcase.
>       * gcc.dg/tree-ssa/sccp-16.c: New testcase.
>       * g++.dg/vect/pr87621.cc: Use larger power to avoid
>       inner loop unrolling.
>       * gcc.dg/vect/pr89440.c: Use larger loop bound to avoid
>       inner loop unrolling.
>       * gcc.dg/pr77975.c: Scan cunrolli dump and adjust.
> ---
>  gcc/testsuite/g++.dg/vect/pr87621.cc          |  2 +-
>  .../g++.dg/warn/Warray-bounds-pr92539.C       | 51 +++++++++++++++++++
>  gcc/testsuite/gcc.dg/pr77975.c                |  6 +--
>  gcc/testsuite/gcc.dg/tree-ssa/sccp-16.c       | 16 ++++++
>  gcc/testsuite/gcc.dg/vect/pr89440.c           |  4 +-
>  gcc/tree-ssa-loop-ivcanon.cc                  | 11 ++--
>  gcc/tree-ssa-loop-niter.cc                    | 47 ++++++++++++++++-
>  7 files changed, 127 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/warn/Warray-bounds-pr92539.C
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sccp-16.c
> 
> diff --git a/gcc/testsuite/g++.dg/vect/pr87621.cc 
> b/gcc/testsuite/g++.dg/vect/pr87621.cc
> index cfc53be4ee1..bc55116ccbf 100644
> --- a/gcc/testsuite/g++.dg/vect/pr87621.cc
> +++ b/gcc/testsuite/g++.dg/vect/pr87621.cc
> @@ -21,7 +21,7 @@ T pow(T x, unsigned int n)
>  void testVec(int* x)
>  {
>    for (int i = 0; i < 8; ++i)
> -    x[i] = pow(x[i], 10);
> +    x[i] = pow(x[i], 100);
>  }
>  
>  /* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" { target { 
> vect_double && vect_hw_misalign } } } } */
> diff --git a/gcc/testsuite/g++.dg/warn/Warray-bounds-pr92539.C 
> b/gcc/testsuite/g++.dg/warn/Warray-bounds-pr92539.C
> new file mode 100644
> index 00000000000..ea506ed1450
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/warn/Warray-bounds-pr92539.C
> @@ -0,0 +1,51 @@
> +// { dg-do compile { target c++11 } }
> +// { dg-options "-O2 -Warray-bounds" }
> +
> +static bool
> +ischar(int ch)
> +{
> +    return (0 == (ch & ~0xff) || ~0 == (ch | 0xff)) != 0;
> +}
> +
> +static bool eat(char const*& first, char const* last)
> +{
> +    if (first != last && ischar(*first)) { // { dg-bogus "bounds" }
> +        ++first;
> +        return true;
> +    }
> +    return false;
> +}
> +
> +static bool eat_two(char const*& first, char const* last)
> +{
> +    auto save = first;
> +    if (eat(first, last) && eat(first, last))
> +        return true;
> +    first = save;
> +    return false;
> +}
> +
> +static bool foo(char const*& first, char const* last)
> +{
> +    auto local_iterator = first;
> +    int i = 0;
> +    for (; i < 3; ++i)
> +        if (!eat_two(local_iterator, last))
> +            return false;
> +    first = local_iterator;
> +    return true;
> +}
> +
> +static bool test(char const* in, bool full_match = true)
> +{
> +    auto last = in;
> +    while (*last)
> +        ++last;
> +    return foo(in, last) && (!full_match || (in == last)); // { dg-bogus 
> "bounds" }
> +}
> +
> +int main()
> +{
> +    return test("aa");
> +}
> +
> diff --git a/gcc/testsuite/gcc.dg/pr77975.c b/gcc/testsuite/gcc.dg/pr77975.c
> index a187ce2b50c..9d7aad49841 100644
> --- a/gcc/testsuite/gcc.dg/pr77975.c
> +++ b/gcc/testsuite/gcc.dg/pr77975.c
> @@ -1,8 +1,8 @@
>  /* PR tree-optimization/77975 */
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-ivcanon-details" } */
> +/* { dg-options "-O2 -fdump-tree-cunrolli-details" } */
>  
> -/* { dg-final { scan-tree-dump-times "Proved that loop 1 iterates 1 times 
> using brute force" 1 "ivcanon" } } */
> +/* { dg-final { scan-tree-dump-times "Proved that loop 1 iterates 2 times 
> using brute force" 1 "cunrolli" } } */
>  
>  unsigned int
>  foo (unsigned int *b)
> @@ -17,7 +17,7 @@ foo (unsigned int *b)
>    return a; 
>  }
>  
> -/* { dg-final { scan-tree-dump-times "Proved that loop 1 iterates 2 times 
> using brute force" 1 "ivcanon" } } */
> +/* { dg-final { scan-tree-dump-times "Proved that loop 1 iterates 3 times 
> using brute force" 1 "cunrolli" } } */
>  
>  unsigned int
>  bar (unsigned int *b)
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sccp-16.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/sccp-16.c
> new file mode 100644
> index 00000000000..c35426fc8c4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/sccp-16.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-cunrolli -fdump-tree-sccp-details" } */
> +
> +int foo ()
> +{
> +  const char *s = "Hello World!";
> +  int len = 0;
> +  while (s[len])
> +    ++len;
> +  return len;
> +}
> +
> +/* For cunrolli the growth is too large, but it should add a canonical IV
> +   and SCCP peform final value replacement.  */
> +/* { dg-final { scan-tree-dump "ivtmp\[^\r\n\]*PHI\[^\r\n\]*13" "cunrolli" } 
> } */
> +/* { dg-final { scan-tree-dump "with expr: 12" "sccp" } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/pr89440.c 
> b/gcc/testsuite/gcc.dg/vect/pr89440.c
> index d84527edb44..06049696912 100644
> --- a/gcc/testsuite/gcc.dg/vect/pr89440.c
> +++ b/gcc/testsuite/gcc.dg/vect/pr89440.c
> @@ -10,7 +10,7 @@ f (float x)
>    float a = 0;
>    for (i = 0; i < 4; ++i)
>      {
> -      for (j = 0; j < 4; ++j)
> +      for (j = 0; j < 32; ++j)
>       {
>         a += 1;
>         x += a;
> @@ -23,7 +23,7 @@ int
>  main()
>  {
>    check_vect ();
> -  if (f (1.0f) != 137.0f)
> +  if (f (1.0f) != 8257.0f)
>      abort ();
>    return 0;
>  }
> diff --git a/gcc/tree-ssa-loop-ivcanon.cc b/gcc/tree-ssa-loop-ivcanon.cc
> index 6e6569e5713..d07b3d593f5 100644
> --- a/gcc/tree-ssa-loop-ivcanon.cc
> +++ b/gcc/tree-ssa-loop-ivcanon.cc
> @@ -1257,6 +1257,7 @@ canonicalize_loop_induction_variables (class loop *loop,
>    bool modified = false;
>    class tree_niter_desc niter_desc;
>    bool may_be_zero = false;
> +  bool by_eval = false;
>  
>    /* For unrolling allow conditional constant or zero iterations, thus
>       perform loop-header copying on-the-fly.  */
> @@ -1291,7 +1292,11 @@ canonicalize_loop_induction_variables (class loop 
> *loop,
>        if (try_eval
>         && (chrec_contains_undetermined (niter)
>             || TREE_CODE (niter) != INTEGER_CST))
> -     niter = find_loop_niter_by_eval (loop, &exit);
> +     {
> +       niter = find_loop_niter_by_eval (loop, &exit);
> +       if (TREE_CODE (niter) == INTEGER_CST)
> +         by_eval = true;
> +     }
>  
>        if (TREE_CODE (niter) != INTEGER_CST)
>       exit = NULL;
> @@ -1346,7 +1351,7 @@ canonicalize_loop_induction_variables (class loop *loop,
>                                 innermost_cunrolli_p))
>      return true;
>  
> -  if (create_iv
> +  if ((create_iv || by_eval)
>        && niter && !chrec_contains_undetermined (niter)
>        && exit && just_once_each_iteration_p (loop, exit->src))
>      {
> @@ -1492,7 +1497,7 @@ tree_unroll_loops_completely_1 (bool may_increase_size, 
> bool unroll_outer,
>      ul = UL_NO_GROWTH;
>  
>    if (canonicalize_loop_induction_variables
> -      (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer,
> +      (loop, false, ul, !flag_tree_loop_ivcanon || cunrolli, unroll_outer,
>         innermost, cunrolli))
>      {
>        /* If we'll continue unrolling, we need to propagate constants
> diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
> index 9e966266ca3..0e2d361ae8e 100644
> --- a/gcc/tree-ssa-loop-niter.cc
> +++ b/gcc/tree-ssa-loop-niter.cc
> @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "cfgloop.h"
>  #include "tree-chrec.h"
>  #include "tree-scalar-evolution.h"
> +#include "tree-data-ref.h"
>  #include "tree-dfa.h"
>  #include "internal-fn.h"
>  #include "gimple-range.h"
> @@ -3595,7 +3596,51 @@ loop_niter_by_eval (class loop *loop, edge exit)
>       {
>         phi = get_base_for (loop, op[j]);
>         if (!phi)
> -         return chrec_dont_know;
> +         {
> +           gassign *def;
> +           if (j == 0
> +               && (cmp == NE_EXPR || cmp == EQ_EXPR)
> +               && TREE_CODE (op[0]) == SSA_NAME
> +               && TREE_CODE (op[1]) == INTEGER_CST
> +               && (def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op[0])))
> +               && gimple_assign_rhs_code (def) == MEM_REF)
> +             {
> +               tree mem = gimple_assign_rhs1 (def);
> +               affine_iv iv;
> +               if (TYPE_MODE (TREE_TYPE (mem)) == TYPE_MODE (char_type_node)
> +                   && simple_iv (loop, loop,
> +                                 TREE_OPERAND (mem, 0), &iv, false)
> +                   && tree_fits_uhwi_p (TREE_OPERAND (mem, 1))
> +                   && tree_fits_uhwi_p (iv.step))
> +                 {
> +                   tree str, off;
> +                   /* iv.base can be &"Foo" but also (char *)&"Foo" + 1.  */
> +                   split_constant_offset (iv.base, &str, &off);
> +                   STRIP_NOPS (str);
> +                   if (TREE_CODE (str) == ADDR_EXPR
> +                       && TREE_CODE (TREE_OPERAND (str, 0)) == STRING_CST
> +                       && tree_fits_uhwi_p (off))
> +                     {
> +                       str = TREE_OPERAND (str, 0);
> +                       unsigned i = 0;
> +                       for (unsigned HOST_WIDE_INT idx
> +                            = (tree_to_uhwi (TREE_OPERAND (mem, 1))
> +                               + tree_to_uhwi (off));
> +                            idx < (unsigned)TREE_STRING_LENGTH (str)
> +                            && i < MAX_ITERATIONS_TO_TRACK;
> +                            idx += tree_to_uhwi (iv.step), ++i)
> +                         {
> +                           int res = compare_tree_int
> +                             (op[1], TREE_STRING_POINTER (str)[idx]);
> +                           if ((cmp == NE_EXPR && res == 0)
> +                               || (cmp == EQ_EXPR && res != 0))
> +                             return build_int_cst (unsigned_type_node, i);
> +                         }
> +                     }
> +                 }
> +             }
> +           return chrec_dont_know;
> +         }
>         val[j] = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
>         next[j] = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
>       }
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to