On Mon, 13 Jan 2025, Richard Biener wrote:

> The following makes niter analysis recognize a loop with an exit
> condition scanning over a STRING_CST.  This is done via enhancing
> the force evaluation code rather than recognizing for example
> strlen (s) as number of iterations because it allows to handle
> some more cases.
> 
> STRING_CSTs are easy to handle since nothing can write to them, also
> processing those should be cheap.  I'd appreciate another eye on
> the constraints I put in.
> 
> Note to avoid the -Warray-bound dianostic we have to early unroll
> the loop (there's no final value replacement done, there's a PR
> for doing this as part of CD-DCE when possibly eliding a loop).
> This works for strings up to 8 chars (including the '\0') only
> (rather than 16, the unroll niter limit) because unroll estimation
> will not see that the load from the string constant goes away.
> 
> Final value replacement doesn't work since ivcanon is now after it,
> it's not the time to move the pass though.  The pass is in theory
> supposed to add a canonical IV for the _by_eval cases, but we
> didn't "fix" this when we added cunrolli (we probably should have
> moved ivcanon very early, or made cunroll add such IV if we
> used _by_eval but did not unroll).
> 
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Some testsuite adjustments are necessary, but the following followup
handles enabling final value replacement by forcing a canonical IV
from cunrolli when we didn't unroll but used force-evaluation to
compute niter.  It also makes us handle the new testcase which ends
up with POINTER_PLUS_EXPR for the inital value.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Any thoughts?

Thanks,
Richard.

>From 705a287694404aafa72bbdc9da21dd1bf448cd85 Mon Sep 17 00:00:00 2001
From: Richard Biener <rguent...@suse.de>
Date: Mon, 13 Jan 2025 15:39:07 +0100
Subject: [PATCH] tree-optimization/92539 - handle more cases
To: gcc-patches@gcc.gnu.org

The following ontop of the previous fix also handles POINTER_PLUS_EXPR
of &"Foo" and a constant offset as happens for the added testcase
as well as making sure to add a canonical IV when we figured niter
by force evaluation during cunrolli so that work isn't wasted,
DCE can eliminate the load and SCCP perform final value replacement.

        PR tree-optimization/92539
        * tree-ssa-loop-ivcanon.cc (canonicalize_loop_induction_variables):
        When niter was computed constant by force evaluation add a
        canonical IV if we didn't unroll.
        * tre-ssa-loop-niter.cc (loop_niter_by_eval): Use
        split_constant_offset to get at a STRING_CST and an initial
        constant offset.

        * gcc.dg/tree-ssa/sccp-16.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/sccp-16.c | 16 +++++++++++
 gcc/tree-ssa-loop-ivcanon.cc            |  9 ++++--
 gcc/tree-ssa-loop-niter.cc              | 38 +++++++++++++++----------
 3 files changed, 46 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sccp-16.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sccp-16.c 
b/gcc/testsuite/gcc.dg/tree-ssa/sccp-16.c
new file mode 100644
index 00000000000..c35426fc8c4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/sccp-16.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cunrolli -fdump-tree-sccp-details" } */
+
+int foo ()
+{
+  const char *s = "Hello World!";
+  int len = 0;
+  while (s[len])
+    ++len;
+  return len;
+}
+
+/* For cunrolli the growth is too large, but it should add a canonical IV
+   and SCCP peform final value replacement.  */
+/* { dg-final { scan-tree-dump "ivtmp\[^\r\n\]*PHI\[^\r\n\]*13" "cunrolli" } } 
*/
+/* { dg-final { scan-tree-dump "with expr: 12" "sccp" } } */
diff --git a/gcc/tree-ssa-loop-ivcanon.cc b/gcc/tree-ssa-loop-ivcanon.cc
index 7cc340b23c5..d07b3d593f5 100644
--- a/gcc/tree-ssa-loop-ivcanon.cc
+++ b/gcc/tree-ssa-loop-ivcanon.cc
@@ -1257,6 +1257,7 @@ canonicalize_loop_induction_variables (class loop *loop,
   bool modified = false;
   class tree_niter_desc niter_desc;
   bool may_be_zero = false;
+  bool by_eval = false;
 
   /* For unrolling allow conditional constant or zero iterations, thus
      perform loop-header copying on-the-fly.  */
@@ -1291,7 +1292,11 @@ canonicalize_loop_induction_variables (class loop *loop,
       if (try_eval
          && (chrec_contains_undetermined (niter)
              || TREE_CODE (niter) != INTEGER_CST))
-       niter = find_loop_niter_by_eval (loop, &exit);
+       {
+         niter = find_loop_niter_by_eval (loop, &exit);
+         if (TREE_CODE (niter) == INTEGER_CST)
+           by_eval = true;
+       }
 
       if (TREE_CODE (niter) != INTEGER_CST)
        exit = NULL;
@@ -1346,7 +1351,7 @@ canonicalize_loop_induction_variables (class loop *loop,
                                  innermost_cunrolli_p))
     return true;
 
-  if (create_iv
+  if ((create_iv || by_eval)
       && niter && !chrec_contains_undetermined (niter)
       && exit && just_once_each_iteration_p (loop, exit->src))
     {
diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
index 7d9b7eb7594..6e1cbcf6fb7 100644
--- a/gcc/tree-ssa-loop-niter.cc
+++ b/gcc/tree-ssa-loop-niter.cc
@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-range.h"
 #include "sreal.h"
 #include "tree-eh.h"
+#include "tree-data-ref.h"
 
 
 /* The maximum number of dominator BBs we search for conditions
@@ -3609,25 +3610,32 @@ loop_niter_by_eval (class loop *loop, edge exit)
                  if (TYPE_MODE (TREE_TYPE (mem)) == TYPE_MODE (char_type_node)
                      && simple_iv (loop, loop,
                                    TREE_OPERAND (mem, 0), &iv, false)
-                     && TREE_CODE (iv.base) == ADDR_EXPR
-                     && TREE_CODE (TREE_OPERAND (iv.base, 0)) == STRING_CST
                      && tree_fits_uhwi_p (TREE_OPERAND (mem, 1))
                      && tree_fits_uhwi_p (iv.step))
                    {
-                     tree str = TREE_OPERAND (iv.base, 0);
-                     unsigned i = 0;
-                     for (unsigned HOST_WIDE_INT idx
-                            = tree_to_uhwi (TREE_OPERAND (mem, 1));
-                          idx < (unsigned)TREE_STRING_LENGTH (str)
-                          && i < MAX_ITERATIONS_TO_TRACK;
-                          idx += tree_to_uhwi (iv.step), ++i)
+                     tree str, off;
+                     /* iv.base can be &"Foo" but also (char *)&"Foo" + 1.  */
+                     split_constant_offset (iv.base, &str, &off);
+                     STRIP_NOPS (str);
+                     if (TREE_CODE (str) == ADDR_EXPR
+                         && TREE_CODE (TREE_OPERAND (str, 0)) == STRING_CST
+                         && tree_fits_uhwi_p (off))
                        {
-                         int res
-                           = compare_tree_int (op[1],
-                                               TREE_STRING_POINTER (str)[idx]);
-                         if ((cmp == NE_EXPR && res == 0)
-                             || (cmp == EQ_EXPR && res != 0))
-                           return build_int_cst (unsigned_type_node, i);
+                         str = TREE_OPERAND (str, 0);
+                         unsigned i = 0;
+                         for (unsigned HOST_WIDE_INT idx
+                              = (tree_to_uhwi (TREE_OPERAND (mem, 1))
+                                 + tree_to_uhwi (off));
+                              idx < (unsigned)TREE_STRING_LENGTH (str)
+                              && i < MAX_ITERATIONS_TO_TRACK;
+                              idx += tree_to_uhwi (iv.step), ++i)
+                           {
+                             int res = compare_tree_int
+                               (op[1], TREE_STRING_POINTER (str)[idx]);
+                             if ((cmp == NE_EXPR && res == 0)
+                                 || (cmp == EQ_EXPR && res != 0))
+                               return build_int_cst (unsigned_type_node, i);
+                           }
                        }
                    }
                }
-- 
2.43.0

Reply via email to