This is the second patch for two IVs combining.
When one IV is chasing another one, to make it safe, we should check if
there is wrap/overflow for either IV.

With the assumption, which computed as this patch, the number of iterations
can be caculated, even the no_overflow flag is not updated for unsigned IVs,
like the test case of this patch.

This patch pass bootstrap and regtest on ppc64le and x86_64.
Is this ok for trunk, or it may more suitable for stage1.

BR,
Jiufu

        PR tree-optimization/102131

gcc/ChangeLog:

        * tree-ssa-loop-niter.c (get_step_count): New function.
        (iv_chase_assumption): New function.
        (number_of_iterations_cond): Call iv_chase_assumption.

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/pr102131.c: New test.


---
 gcc/tree-ssa-loop-niter.c            | 73 ++++++++++++++++++++++++----
 gcc/testsuite/gcc.dg/vect/pr102131.c | 47 ++++++++++++++++++
 2 files changed, 110 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr102131.c

diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 439d595a79f..56261164f28 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -1788,6 +1788,63 @@ dump_affine_iv (FILE *file, affine_iv *iv)
     }
 }
 
+/* Generate expr: (HIGH - LOW) / STEP, under UTYPE. */
+
+static tree
+get_step_count (tree high, tree low, tree step, tree utype,
+               bool end_inclusive = false)
+{
+  tree delta = fold_build2 (MINUS_EXPR, TREE_TYPE (low), high, low);
+  delta = fold_convert (utype, delta);
+  if (end_inclusive)
+    delta = fold_build2 (PLUS_EXPR, utype, delta, build_one_cst (utype));
+
+  if (tree_int_cst_sign_bit (step))
+    step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
+  step = fold_convert (utype, step);
+
+  return fold_build2 (FLOOR_DIV_EXPR, utype, delta, step);
+}
+
+/*  Get the additional assumption if both two steps are not zero.
+    Assumptions satisfy that there is no overflow or wrap during
+    v0 and v1 chasing.  */
+
+static tree
+iv_chase_assumption (affine_iv *iv0, affine_iv *iv1, tree step,
+                    enum tree_code code)
+{
+  /* Here, no need additional assumptions for NE.  */
+  if (code == NE_EXPR)
+    return boolean_true_node;
+
+  /* No need addition assumption for pointer.  */
+  tree type = TREE_TYPE (iv0->base);
+  if (POINTER_TYPE_P (type))
+    return boolean_true_node;
+
+  bool positive0 = !tree_int_cst_sign_bit (iv0->step);
+  bool positive1 = !tree_int_cst_sign_bit (iv1->step);
+  tree utype = unsigned_type_for (type);
+  bool add1 = code == LE_EXPR;
+  tree niter = get_step_count (iv1->base, iv0->base, step, utype, add1);
+
+  int prec = TYPE_PRECISION (type);
+  signop sgn = TYPE_SIGN (type);
+  tree max = wide_int_to_tree (type, wi::max_value (prec, sgn));
+  tree min = wide_int_to_tree (type, wi::min_value (prec, sgn));
+  tree valid_niter0, valid_niter1;
+
+  valid_niter0 = positive0 ? get_step_count (max, iv0->base, iv0->step, utype)
+                          : get_step_count (iv0->base, min, iv0->step, utype);
+  valid_niter1 = positive1 ? get_step_count (max, iv1->base, iv1->step, utype)
+                          : get_step_count (iv1->base, min, iv1->step, utype);
+
+  tree e0 = fold_build2 (LT_EXPR, boolean_type_node, niter, valid_niter0);
+  tree e1 = fold_build2 (LT_EXPR, boolean_type_node, niter, valid_niter1);
+  return fold_build2 (TRUTH_AND_EXPR, boolean_type_node, e0, e1);
+}
+
 /* Determine the number of iterations according to condition (for staying
    inside loop) which compares two induction variables using comparison
    operator CODE.  The induction variable on left side of the comparison
@@ -1879,11 +1936,10 @@ number_of_iterations_cond (class loop *loop,
        {iv0.base, iv0.step - iv1.step} cmp_code {iv1.base, 0}
 
      provided that either below condition is satisfied:
+     a. iv0.step and iv1.step are integer.
+     b. Additional condition: before iv0 chase up v1, iv0 and iv1 should not
+     step over min or max of the type.  */
 
-       a) the test is NE_EXPR;
-       b) iv0.step - iv1.step is integer and iv0/iv1 don't overflow.
-
-     This rarely occurs in practice, but it is simple enough to manage.  */
   if (!integer_zerop (iv0->step) && !integer_zerop (iv1->step))
     {
       tree step_type = POINTER_TYPE_P (type) ? sizetype : type;
@@ -1894,15 +1950,12 @@ number_of_iterations_cond (class loop *loop,
       if (tree_int_cst_sign_bit (step))
        return false;
 
-      if (code != NE_EXPR
-         && (TREE_CODE (step) != INTEGER_CST
-             || !iv0->no_overflow || !iv1->no_overflow))
+      niter->assumptions = iv_chase_assumption (iv0, iv1, step, code);
+      if (integer_zerop (niter->assumptions))
        return false;
 
       iv0->step = step;
-      if (!POINTER_TYPE_P (type))
-       iv0->no_overflow = false;
-
+      iv0->no_overflow = true;
       iv1->step = build_int_cst (step_type, 0);
       iv1->no_overflow = true;
     }
diff --git a/gcc/testsuite/gcc.dg/vect/pr102131.c 
b/gcc/testsuite/gcc.dg/vect/pr102131.c
new file mode 100644
index 00000000000..23975cfeadb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr102131.c
@@ -0,0 +1,47 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+#define MAX ((unsigned int) 0xffffffff)
+#define MIN ((unsigned int) (0))
+
+int arr[512];
+
+#define FUNC(NAME, CODE, S0, S1)                                               
\
+  unsigned __attribute__ ((noinline)) NAME (unsigned int b0, unsigned int b1)  
\
+  {                                                                            
\
+    unsigned int n = 0;                                                        
\
+    unsigned int i0, i1;                                                       
\
+    int *p = arr;                                                              
\
+    for (i0 = b0, i1 = b1; i0 CODE i1; i0 += S0, i1 += S1)                     
\
+      {                                                                        
\
+       n++;                                                                   \
+       *p++ = i0 + i1;                                                        \
+      }                                                                        
\
+    return n;                                                                  
\
+  }
+
+FUNC (lt_5_1, <, 5, 1);
+FUNC (le_1_m5, <=, 1, -5);
+FUNC (lt_1_10, <, 1, 10);
+
+int
+main ()
+{
+  int fail = 0;
+  if (lt_5_1 (MAX - 124, MAX - 27) != 28)
+    fail++;
+
+  /* to save time, do not run this. */
+  /*
+  if (le_1_m5 (MIN + 1, MIN + 9) != 715827885)
+    fail++;  */
+
+  if (lt_1_10 (MAX - 1000, MAX - 500) != 51)
+    fail++;
+
+  if (fail)
+    __builtin_abort ();
+  
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
-- 
2.17.1

Reply via email to