Changes since v1:
* Update assumptions for niter, add more test cases check
* Use widest_int/wide_int instead mpz to do +-/
* Move some early check for quick return
For code like:
unsigned foo(unsigned val, unsigned start)
{
unsigned cnt = 0;
for (unsigned i = start; i > val; ++i)
cnt++;
return cnt;
}
The number of iterations should be about UINT_MAX - start.
There is function adjust_cond_for_loop_until_wrap which
handles similar work for const bases.
Like adjust_cond_for_loop_until_wrap, this patch enhance
function number_of_iterations_cond/number_of_iterations_lt
to analyze number of iterations for this kind of loop.
Bootstrap and regtest pass on powerpc64le, x86_64 and aarch64.
Is this ok for trunk?
gcc/ChangeLog:
2021-07-07 Jiufu Guo <guoji...@linux.ibm.com>
PR tree-optimization/101145
* tree-ssa-loop-niter.c (number_of_iterations_until_wrap):
New function.
(number_of_iterations_lt): Invoke above function.
(adjust_cond_for_loop_until_wrap):
Merge to number_of_iterations_until_wrap.
(number_of_iterations_cond): Update invokes for
adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
gcc/testsuite/ChangeLog:
2021-07-07 Jiufu Guo <guoji...@linux.ibm.com>
PR tree-optimization/101145
* gcc.dg/vect/pr101145.c: New test.
* gcc.dg/vect/pr101145.inc: New test.
* gcc.dg/vect/pr101145_1.c: New test.
* gcc.dg/vect/pr101145_2.c: New test.
* gcc.dg/vect/pr101145_3.c: New test.
* gcc.dg/vect/pr101145inf.c: New test.
* gcc.dg/vect/pr101145inf.inc: New test.
* gcc.dg/vect/pr101145inf_1.c: New test.
---
gcc/testsuite/gcc.dg/vect/pr101145.c | 187 ++++++++++++++++++++++
gcc/testsuite/gcc.dg/vect/pr101145.inc | 63 ++++++++
gcc/testsuite/gcc.dg/vect/pr101145_1.c | 15 ++
gcc/testsuite/gcc.dg/vect/pr101145_2.c | 15 ++
gcc/testsuite/gcc.dg/vect/pr101145_3.c | 15 ++
gcc/testsuite/gcc.dg/vect/pr101145inf.c | 25 +++
gcc/testsuite/gcc.dg/vect/pr101145inf.inc | 28 ++++
gcc/testsuite/gcc.dg/vect/pr101145inf_1.c | 23 +++
gcc/tree-ssa-loop-niter.c | 157 ++++++++++--------
9 files changed, 463 insertions(+), 65 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.inc
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c
b/gcc/testsuite/gcc.dg/vect/pr101145.c
new file mode 100644
index 00000000000..74031b031cf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
@@ -0,0 +1,187 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#include <limits.h>
+
+unsigned __attribute__ ((noinline))
+foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+ while (n < ++l)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
+{
+ while (UINT_MAX - 64 < ++l)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
n)
+{
+ l = UINT_MAX - 32;
+ while (n < ++l)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
n)
+{
+ while (n <= ++l)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
n)
+{ // infininate
+ while (0 <= ++l)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
n)
+{
+ //no loop
+ l = UINT_MAX;
+ while (n < ++l)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+ while (--l < n)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
+{
+ while (--l < 64)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
n)
+{
+ l = 32;
+ while (--l < n)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+
+int a[3200], b[3200];
+int fail;
+
+int
+main ()
+{
+ unsigned l, n;
+ unsigned res;
+ /* l > n*/
+ n = UINT_MAX - 64;
+ l = n + 32;
+ res = foo (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n;
+ res = foo (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n - 1;
+ res = foo (a, b, l, n);
+ if (res != l + 1)
+ fail++;
+
+ l = n - 32;
+ res = foo (a, b, l, n);
+ if (res != l + 1)
+ fail++;
+
+ l = UINT_MAX;
+ res = foo (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n + 32;
+ res = foo_1 (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n + 32;
+ res = foo_2 (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n;
+ res = foo_3 (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n - 1;
+ res = foo_3 (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n - 2;
+ res = foo_3 (a, b, l, n);
+ if (res != l + 1)
+ fail++;
+
+ res = foo_5 (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ n = 64;
+ l = n - 32;
+ res = bar (a, b, l, n);
+ res++;
+ if (res != 0)
+ fail++;
+
+ l = n;
+ res = bar (a, b, l, n);
+ res++;
+ if (res != 0)
+ fail++;
+
+ l = n + 1;
+ res = bar (a, b, l, n);
+ res++;
+ if (res != l)
+ fail++;
+
+ l = 0;
+ res = bar (a, b, l, n);
+ res++;
+ if (res != l)
+ fail++;
+
+ l = 32;
+ res = bar_1 (a, b, l, n);
+ res++;
+ if (res != 0)
+ fail++;
+
+ res = bar_1 (a, b, l, n);
+ res++;
+ if (res != 0)
+ fail++;
+
+ if (fail)
+ __builtin_abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" } }
*/
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc
b/gcc/testsuite/gcc.dg/vect/pr101145.inc
new file mode 100644
index 00000000000..6eed3fa8aca
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc
@@ -0,0 +1,63 @@
+TYPE __attribute__ ((noinline))
+foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
+{
+ for (l = L_BASE; n < l; l += C)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+TYPE __attribute__ ((noinline))
+bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
+{
+ for (l = L_BASE_DOWN; l < n; l -= C)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+int __attribute__ ((noinline)) neq (int a, int b) { return a != b; }
+
+int a[1000], b[1000];
+int fail;
+
+int
+main ()
+{
+ TYPE res;
+ TYPE l;
+ TYPE n;
+ n = N_BASE;
+ l = n - C;
+ res = foo_sign (a, b, l, n);
+ if (res != l)
+ fail++;
+
+ l = n;
+ res = foo_sign (a, b, l, n);
+ if (res != l)
+ fail++;
+
+ l = n + C;
+ res = foo_sign (a, b, l, n);
+ if (neq ((res - MIN) / C, 0))
+ fail++;
+
+ n = N_BASE_DOWN;
+ l = n - C;
+ res = bar_sign (a, b, l, n);
+ if (neq ((MAX - res) / C, 0))
+ fail++;
+
+ l = n;
+ res = bar_sign (a, b, l, n);
+ if (res != l)
+ fail++;
+
+ l = n + C;
+ res = bar_sign (a, b, l, n);
+ if (res != l)
+ fail++;
+
+ if (fail)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c
b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
new file mode 100644
index 00000000000..94f6b99b893
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
@@ -0,0 +1,15 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#define TYPE signed char
+#define MIN -128
+#define MAX 127
+#define N_BASE (MAX - 32)
+#define N_BASE_DOWN (MIN + 32)
+
+#define C 3
+#define L_BASE l
+#define L_BASE_DOWN l
+
+#include "pr101145.inc"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } }
*/
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c
b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
new file mode 100644
index 00000000000..d3cfc9e01e1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
@@ -0,0 +1,15 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#define TYPE unsigned char
+#define MIN 0
+#define MAX 255
+#define N_BASE (MAX - 32 + 1)
+#define N_BASE_DOWN (MIN + 32)
+
+#define C 2
+#define L_BASE l
+#define L_BASE_DOWN l
+
+#include "pr101145.inc"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } }
*/
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
new file mode 100644
index 00000000000..9d5863726a5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
@@ -0,0 +1,15 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#define TYPE int *
+#define MIN ((TYPE)0)
+#define MAX ((TYPE)((long long)-1))
+#define N_BASE (MIN - 32)
+#define N_BASE_DOWN (MIN + 32)
+
+#define C 1
+#define L_BASE l
+#define L_BASE_DOWN l
+
+#include "pr101145.inc"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } }
*/
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf.c
b/gcc/testsuite/gcc.dg/vect/pr101145inf.c
new file mode 100644
index 00000000000..ed49f5670b5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145inf.c
@@ -0,0 +1,25 @@
+/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */
+/* { dg-options "-O3" } */
+#include <limits.h>
+#include "pr101145inf.inc"
+
+__attribute__ ((noinline))
+unsigned foo(unsigned val, unsigned start)
+{
+ unsigned cnt = 0;
+ for (unsigned i = start; val <= i; i+=16)
+ cnt++;
+ return cnt;
+}
+
+void test_finite ()
+{
+ unsigned n = foo (16, UINT_MAX - 32);
+ if (n != 3)
+ __builtin_abort ();
+}
+
+void test_infinite ()
+{
+ foo (15, UINT_MAX - 32);
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf.inc
b/gcc/testsuite/gcc.dg/vect/pr101145inf.inc
new file mode 100644
index 00000000000..4aa3d049187
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145inf.inc
@@ -0,0 +1,28 @@
+#include <unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+
+void test_finite ();
+void test_infinite ();
+
+void do_exit (int i)
+{
+ exit (0);
+}
+
+int main(void)
+{
+ test_finite ();
+ struct sigaction s;
+ sigemptyset (&s.sa_mask);
+ s.sa_handler = do_exit;
+ s.sa_flags = 0;
+ sigaction (SIGALRM, &s, NULL);
+ alarm (1);
+
+ test_infinite ();
+
+ __builtin_abort ();
+ return 1;
+}
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c
b/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c
new file mode 100644
index 00000000000..4ee3e31c7fe
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c
@@ -0,0 +1,23 @@
+/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */
+/* { dg-options "-O3" } */
+#include <limits.h>
+#include "pr101145inf.inc"
+
+__attribute__ ((noinline))
+unsigned foo(unsigned val, unsigned start)
+{
+ unsigned cnt = 0;
+ for (unsigned i = start; i < val; i-=16)
+ cnt++;
+ return cnt;
+}
+
+void test_finite ()
+{
+ foo (UINT_MAX - 15, 32);
+}
+
+void test_infinite ()
+{
+ foo (UINT_MAX - 14, 32);
+}
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index b5add827018..85c179be68d 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -1473,6 +1473,93 @@ assert_loop_rolls_lt (tree type, affine_iv
*iv0, affine_iv *iv1,
}
}
+/* Determines number of iterations of loop whose ending condition
+ is IV0 < IV1 which likes: {base, -C} < n, or n < {base, C}.
+ The number of iterations is stored to NITER. */
+
+static bool
+number_of_iterations_until_wrap (class loop *, tree type, affine_iv
*iv0,
+ affine_iv *iv1, class tree_niter_desc *niter)
+{
+ tree niter_type = unsigned_type_for (type);
+ tree step, num, assumptions, may_be_zero;
+ wide_int high, low, max, min;
+
+ may_be_zero = fold_build2 (LE_EXPR, boolean_type_node, iv1->base,
iv0->base);
+ if (integer_onep (may_be_zero))
+ return false;
+
+ int prec = TYPE_PRECISION (type);
+ signop sgn = TYPE_SIGN (type);
+ min = wi::min_value (prec, sgn);
+ max = wi::max_value (prec, sgn);
+
+ /* n < {base, C}. */
+ if (integer_zerop (iv0->step) && !tree_int_cst_sign_bit (iv1->step))
+ {
+ step = iv1->step;
+ /* MIN + C - 1 <= n. */
+ tree last = wide_int_to_tree (type, min + wi::to_wide (step) -
1);
+ assumptions = fold_build2 (LE_EXPR, boolean_type_node, last,
iv0->base);
+ if (integer_zerop (assumptions))
+ return false;
+
+ num = fold_build2 (MINUS_EXPR, niter_type, wide_int_to_tree
(type, max),
+ iv1->base);
+ high = max;
+ if (TREE_CODE (iv1->base) == INTEGER_CST)
+ low = wi::to_wide (iv1->base) - 1;
+ else if (TREE_CODE (iv0->base) == INTEGER_CST)
+ low = wi::to_wide (iv0->base);
+ else
+ low = min;
+ }
+ /* {base, -C} < n. */
+ else if (tree_int_cst_sign_bit (iv0->step) && integer_zerop
(iv1->step))
+ {
+ step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step),
iv0->step);
+ /* MAX - C + 1 >= n. */
+ tree last = wide_int_to_tree (type, max - wi::to_wide (step) +
1);
+ assumptions = fold_build2 (GE_EXPR, boolean_type_node, last,
iv1->base);
+ if (integer_zerop (assumptions))
+ return false;
+
+ num = fold_build2 (MINUS_EXPR, niter_type, iv0->base,
+ wide_int_to_tree (type, min));
+ low = min;
+ if (TREE_CODE (iv0->base) == INTEGER_CST)
+ high = wi::to_wide (iv0->base) + 1;
+ else if (TREE_CODE (iv1->base) == INTEGER_CST)
+ high = wi::to_wide (iv1->base);
+ else
+ high = max;
+ }
+ else
+ return false;
+
+ /* (delta + step - 1) / step */
+ step = fold_convert (niter_type, step);
+ num = fold_convert (niter_type, num);
+ num = fold_build2 (PLUS_EXPR, niter_type, num, step);
+ niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, num, step);
+
+ widest_int delta, s;
+ delta = widest_int::from (high, sgn) - widest_int::from (low, sgn);
+ s = wi::to_widest (step);
+ delta = delta + s - 1;
+ niter->max = wi::udiv_floor (delta, s);
+
+ niter->may_be_zero = may_be_zero;
+
+ if (!integer_nonzerop (assumptions))
+ niter->assumptions = fold_build2 (TRUTH_AND_EXPR,
boolean_type_node,
+ niter->assumptions, assumptions);
+
+ niter->control.no_overflow = false;
+
+ return true;
+}
+
/* Determines number of iterations of loop whose ending condition
is IV0 < IV1. TYPE is the type of the iv. The number of
iterations is stored to NITER. BNDS bounds the difference
@@ -1501,6 +1588,11 @@ number_of_iterations_lt (class loop *loop, tree
type, affine_iv *iv0,
niter->bound = iv0->base;
}
+ /* {base, -C} < n, or n < {base, C} */
+ if (tree_int_cst_sign_bit (iv0->step)
+ || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit
(iv1->step)))
+ return number_of_iterations_until_wrap (loop, type, iv0, iv1,
niter);
+
delta = fold_build2 (MINUS_EXPR, niter_type,
fold_convert (niter_type, iv1->base),
fold_convert (niter_type, iv0->base));
@@ -1665,62 +1757,6 @@ dump_affine_iv (FILE *file, affine_iv *iv)
}
}
-/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts
- the condition for loop-until-wrap cases. For example:
- (unsigned){8, -1}_loop < 10 => {0, 1} != 9
- 10 < (unsigned){0, max - 7}_loop => {0, 1} != 8
- Return true if condition is successfully adjusted. */
-
-static bool
-adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code
*code,
- affine_iv *iv1)
-{
- /* Only support simple cases for the moment. */
- if (TREE_CODE (iv0->base) != INTEGER_CST
- || TREE_CODE (iv1->base) != INTEGER_CST)
- return false;
-
- tree niter_type = unsigned_type_for (type), high, low;
- /* Case: i-- < 10. */
- if (integer_zerop (iv1->step))
- {
- /* TODO: Should handle case in which abs(step) != 1. */
- if (!integer_minus_onep (iv0->step))
- return false;
- /* Give up on infinite loop. */
- if (*code == LE_EXPR
- && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type)))
- return false;
- high = fold_build2 (PLUS_EXPR, niter_type,
- fold_convert (niter_type, iv0->base),
- build_int_cst (niter_type, 1));
- low = fold_convert (niter_type, TYPE_MIN_VALUE (type));
- }
- else if (integer_zerop (iv0->step))
- {
- /* TODO: Should handle case in which abs(step) != 1. */
- if (!integer_onep (iv1->step))
- return false;
- /* Give up on infinite loop. */
- if (*code == LE_EXPR
- && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type)))
- return false;
- high = fold_convert (niter_type, TYPE_MAX_VALUE (type));
- low = fold_build2 (MINUS_EXPR, niter_type,
- fold_convert (niter_type, iv1->base),
- build_int_cst (niter_type, 1));
- }
- else
- gcc_unreachable ();
-
- iv0->base = low;
- iv0->step = fold_convert (niter_type, integer_one_node);
- iv1->base = high;
- iv1->step = build_int_cst (niter_type, 0);
- *code = NE_EXPR;
- return true;
-}
-
/* Determine the number of iterations according to condition (for
staying
inside loop) which compares two induction variables using
comparison
operator CODE. The induction variable on left side of the
comparison
@@ -1855,15 +1891,6 @@ number_of_iterations_cond (class loop *loop,
return true;
}
- /* Handle special case loops: while (i-- < 10) and while (10 < i++)
by
- adjusting iv0, iv1 and code. */
- if (code != NE_EXPR
- && (tree_int_cst_sign_bit (iv0->step)
- || (!integer_zerop (iv1->step)
- && !tree_int_cst_sign_bit (iv1->step)))
- && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1))
- return false;
-
/* OK, now we know we have a senseful loop. Handle several cases,
depending
on what comparison operator is used. */
bound_difference (loop, iv1->base, iv0->base, &bnds);