On Thu, 1 Jul 2021, Jiufu Guo wrote: > For code like: > unsigned foo(unsigned val, unsigned start) > { > unsigned cnt = 0; > for (unsigned i = start; i > val; ++i) > cnt++; > return cnt; > } > > The number of iterations should be about UINT_MAX - start.
For unsigned foo(unsigned val, unsigned start) { unsigned cnt = 0; for (unsigned i = start; i >= val; ++i) cnt++; return cnt; } and val == 0 the loop never terminates. I don't see anywhere in the patch that you disregard GE_EXPR and I remember the code handles GE as well as GT? From a quick look this is also not covered by a testcase you add - not exactly sure how it would materialize in a miscompilation. > There is function adjust_cond_for_loop_until_wrap which > handles similar work for const bases. > Like adjust_cond_for_loop_until_wrap, this patch enhance > function number_of_iterations_cond/number_of_iterations_lt > to analyze number of iterations for this kind of loop. > > Bootstrap and regtest pass on powerpc64le, is this ok for trunk? > > gcc/ChangeLog: > > PR tree-optimization/101145 > * tree-ssa-loop-niter.c > (number_of_iterations_until_wrap): New function. > (number_of_iterations_lt): Invoke above function. > (adjust_cond_for_loop_until_wrap): > Merge to number_of_iterations_until_wrap. > (number_of_iterations_cond): Update invokes for > adjust_cond_for_loop_until_wrap and number_of_iterations_lt. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/101145 > * gcc.dg/vect/pr101145.c: New test. > * gcc.dg/vect/pr101145.inc: New test. > * gcc.dg/vect/pr101145_1.c: New test. > * gcc.dg/vect/pr101145_2.c: New test. > * gcc.dg/vect/pr101145_3.c: New test. > --- > gcc/testsuite/gcc.dg/vect/pr101145.c | 187 +++++++++++++++++++++++++ > gcc/testsuite/gcc.dg/vect/pr101145.inc | 63 +++++++++ > gcc/testsuite/gcc.dg/vect/pr101145_1.c | 15 ++ > gcc/testsuite/gcc.dg/vect/pr101145_2.c | 15 ++ > gcc/testsuite/gcc.dg/vect/pr101145_3.c | 15 ++ > gcc/tree-ssa-loop-niter.c | 150 +++++++++++--------- > 6 files changed, 380 insertions(+), 65 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c > > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c > b/gcc/testsuite/gcc.dg/vect/pr101145.c > new file mode 100644 > index 00000000000..74031b031cf > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c > @@ -0,0 +1,187 @@ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-options "-O3 -fdump-tree-vect-details" } */ > +#include <limits.h> > + > +unsigned __attribute__ ((noinline)) > +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) > +{ > + while (n < ++l) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned) > +{ > + while (UINT_MAX - 64 < ++l) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) > +{ > + l = UINT_MAX - 32; > + while (n < ++l) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) > +{ > + while (n <= ++l) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) > +{ // infininate > + while (0 <= ++l) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) > +{ > + //no loop > + l = UINT_MAX; > + while (n < ++l) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) > +{ > + while (--l < n) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned) > +{ > + while (--l < 64) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) > +{ > + l = 32; > + while (--l < n) > + *a++ = *b++ + 1; > + return l; > +} > + > + > +int a[3200], b[3200]; > +int fail; > + > +int > +main () > +{ > + unsigned l, n; > + unsigned res; > + /* l > n*/ > + n = UINT_MAX - 64; > + l = n + 32; > + res = foo (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n; > + res = foo (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n - 1; > + res = foo (a, b, l, n); > + if (res != l + 1) > + fail++; > + > + l = n - 32; > + res = foo (a, b, l, n); > + if (res != l + 1) > + fail++; > + > + l = UINT_MAX; > + res = foo (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n + 32; > + res = foo_1 (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n + 32; > + res = foo_2 (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n; > + res = foo_3 (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n - 1; > + res = foo_3 (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n - 2; > + res = foo_3 (a, b, l, n); > + if (res != l + 1) > + fail++; > + > + res = foo_5 (a, b, l, n); > + if (res != 0) > + fail++; > + > + n = 64; > + l = n - 32; > + res = bar (a, b, l, n); > + res++; > + if (res != 0) > + fail++; > + > + l = n; > + res = bar (a, b, l, n); > + res++; > + if (res != 0) > + fail++; > + > + l = n + 1; > + res = bar (a, b, l, n); > + res++; > + if (res != l) > + fail++; > + > + l = 0; > + res = bar (a, b, l, n); > + res++; > + if (res != l) > + fail++; > + > + l = 32; > + res = bar_1 (a, b, l, n); > + res++; > + if (res != 0) > + fail++; > + > + res = bar_1 (a, b, l, n); > + res++; > + if (res != 0) > + fail++; > + > + if (fail) > + __builtin_abort (); > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc > b/gcc/testsuite/gcc.dg/vect/pr101145.inc > new file mode 100644 > index 00000000000..6eed3fa8aca > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc > @@ -0,0 +1,63 @@ > +TYPE __attribute__ ((noinline)) > +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) > +{ > + for (l = L_BASE; n < l; l += C) > + *a++ = *b++ + 1; > + return l; > +} > + > +TYPE __attribute__ ((noinline)) > +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) > +{ > + for (l = L_BASE_DOWN; l < n; l -= C) > + *a++ = *b++ + 1; > + return l; > +} > + > +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; } > + > +int a[1000], b[1000]; > +int fail; > + > +int > +main () > +{ > + TYPE res; > + TYPE l; > + TYPE n; > + n = N_BASE; > + l = n - C; > + res = foo_sign (a, b, l, n); > + if (res != l) > + fail++; > + > + l = n; > + res = foo_sign (a, b, l, n); > + if (res != l) > + fail++; > + > + l = n + C; > + res = foo_sign (a, b, l, n); > + if (neq ((res - MIN) / C, 0)) > + fail++; > + > + n = N_BASE_DOWN; > + l = n - C; > + res = bar_sign (a, b, l, n); > + if (neq ((MAX - res) / C, 0)) > + fail++; > + > + l = n; > + res = bar_sign (a, b, l, n); > + if (res != l) > + fail++; > + > + l = n + C; > + res = bar_sign (a, b, l, n); > + if (res != l) > + fail++; > + > + if (fail) > + __builtin_abort (); > + return 0; > +} > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c > b/gcc/testsuite/gcc.dg/vect/pr101145_1.c > new file mode 100644 > index 00000000000..94f6b99b893 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c > @@ -0,0 +1,15 @@ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-options "-O3 -fdump-tree-vect-details" } */ > +#define TYPE signed char > +#define MIN -128 > +#define MAX 127 > +#define N_BASE (MAX - 32) > +#define N_BASE_DOWN (MIN + 32) > + > +#define C 3 > +#define L_BASE l > +#define L_BASE_DOWN l > + > +#include "pr101145.inc" > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c > b/gcc/testsuite/gcc.dg/vect/pr101145_2.c > new file mode 100644 > index 00000000000..d3cfc9e01e1 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c > @@ -0,0 +1,15 @@ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-options "-O3 -fdump-tree-vect-details" } */ > +#define TYPE unsigned char > +#define MIN 0 > +#define MAX 255 > +#define N_BASE (MAX - 32 + 1) > +#define N_BASE_DOWN (MIN + 32) > + > +#define C 2 > +#define L_BASE l > +#define L_BASE_DOWN l > + > +#include "pr101145.inc" > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c > b/gcc/testsuite/gcc.dg/vect/pr101145_3.c > new file mode 100644 > index 00000000000..e2cf9b1f7e6 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c > @@ -0,0 +1,15 @@ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-options "-O3 -fdump-tree-vect-details" } */ > +#define TYPE int * > +#define MIN ((TYPE)0) > +#define MAX ((TYPE)((long long)-1)) > +#define N_BASE ((TYPE) (-32)) > +#define N_BASE_DOWN (MIN + 32) > + > +#define C 1 > +#define L_BASE l > +#define L_BASE_DOWN l > + > +#include "pr101145.inc" > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ > diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c > index b5add827018..06db6a36ef8 100644 > --- a/gcc/tree-ssa-loop-niter.c > +++ b/gcc/tree-ssa-loop-niter.c > @@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv *iv0, > affine_iv *iv1, > } > } > > +/* Determines number of iterations of loop whose ending condition > + is IV0 < IV1 which likes: {base, -C} < n, or n < {base, C}. > + The number of iterations is stored to NITER. */ > + > +static bool > +number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0, > + affine_iv *iv1, class tree_niter_desc *niter) > +{ > + tree niter_type = unsigned_type_for (type); > + tree max, min; > + > + if (POINTER_TYPE_P (type)) > + { > + max = fold_convert (type, TYPE_MAX_VALUE (niter_type)); > + min = fold_convert (type, TYPE_MIN_VALUE (niter_type)); > + } > + else > + { > + max = TYPE_MAX_VALUE (type); > + min = TYPE_MIN_VALUE (type); > + } It would be cheaper to use wide_int here - wi::min/max_value (TYPE_PRECISION (...), TYPE_SIGN (...)) and in the simplification code below. > + tree high = max, low = min, one = build_int_cst (niter_type, 1); > + tree step; > + > + /* n < {base, C}. */ > + if (integer_zerop (iv0->step) && TREE_CODE (iv1->step) == INTEGER_CST > + && !tree_int_cst_sign_bit (iv1->step)) > + { > + step = iv1->step; > + niter->niter = fold_build2 (MINUS_EXPR, niter_type, max, iv1->base); > + if (TREE_CODE (iv1->base) == INTEGER_CST) > + low = fold_build2 (MINUS_EXPR, type, iv1->base, one); > + else if (TREE_CODE (iv0->base) == INTEGER_CST) > + low = iv0->base; > + } > + /* {base, -C} < n. */ > + else if (TREE_CODE (iv0->step) == INTEGER_CST > + && tree_int_cst_sign_bit (iv0->step) && integer_zerop (iv1->step)) > + { > + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), iv0->step); > + niter->niter = fold_build2 (MINUS_EXPR, niter_type, iv0->base, min); > + if (TREE_CODE (iv0->base) == INTEGER_CST) > + high = fold_build2 (PLUS_EXPR, type, iv0->base, one); > + else if (TREE_CODE (iv1->base) == INTEGER_CST) > + high = iv1->base; > + } > + else > + return false; > + > + /* (delta + step - 1) / step */ > + step = fold_convert (niter_type, step); > + niter->niter = fold_convert (niter_type, niter->niter); > + niter->niter = fold_build2 (PLUS_EXPR, niter_type, niter->niter, step); > + niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, niter->niter, > step); > + > + tree m = fold_build2 (MINUS_EXPR, niter_type, high, low); > + m = fold_convert (niter_type, m); > + mpz_t mstep, tmp, mmax; > + mpz_init (mstep); > + mpz_init (tmp); > + mpz_init (mmax); > + wi::to_mpz (wi::to_wide (step), mstep, UNSIGNED); > + wi::to_mpz (wi::to_wide (m), mmax, UNSIGNED); > + mpz_add (tmp, mmax, mstep); > + mpz_sub_ui (tmp, tmp, 1); > + mpz_fdiv_q (tmp, tmp, mstep); > + niter->max = widest_int::from (wi::from_mpz (niter_type, tmp, false), > + TYPE_SIGN (niter_type)); > + mpz_clear (mstep); > + mpz_clear (tmp); You forget to clear mmax? I wonder why you need to involve GMP here at all since this is just add/sub/divide - you could use widest_int directly I think. At least the back-and-forth between trees, wide-int and mpz looks odd. Richard. > + niter->may_be_zero > + = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base); > + > + niter->control.no_overflow = false; > + > + return true; > +} > + > /* Determines number of iterations of loop whose ending condition > is IV0 < IV1. TYPE is the type of the iv. The number of > iterations is stored to NITER. BNDS bounds the difference > @@ -1501,6 +1581,11 @@ number_of_iterations_lt (class loop *loop, tree type, > affine_iv *iv0, > niter->bound = iv0->base; > } > > + /* {base, -C} < n, or n < {base, C} */ > + if (tree_int_cst_sign_bit (iv0->step) > + || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit (iv1->step))) > + return number_of_iterations_until_wrap (loop, type, iv0, iv1, niter); > + > delta = fold_build2 (MINUS_EXPR, niter_type, > fold_convert (niter_type, iv1->base), > fold_convert (niter_type, iv0->base)); > @@ -1665,62 +1750,6 @@ dump_affine_iv (FILE *file, affine_iv *iv) > } > } > > -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts > - the condition for loop-until-wrap cases. For example: > - (unsigned){8, -1}_loop < 10 => {0, 1} != 9 > - 10 < (unsigned){0, max - 7}_loop => {0, 1} != 8 > - Return true if condition is successfully adjusted. */ > - > -static bool > -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code *code, > - affine_iv *iv1) > -{ > - /* Only support simple cases for the moment. */ > - if (TREE_CODE (iv0->base) != INTEGER_CST > - || TREE_CODE (iv1->base) != INTEGER_CST) > - return false; > - > - tree niter_type = unsigned_type_for (type), high, low; > - /* Case: i-- < 10. */ > - if (integer_zerop (iv1->step)) > - { > - /* TODO: Should handle case in which abs(step) != 1. */ > - if (!integer_minus_onep (iv0->step)) > - return false; > - /* Give up on infinite loop. */ > - if (*code == LE_EXPR > - && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type))) > - return false; > - high = fold_build2 (PLUS_EXPR, niter_type, > - fold_convert (niter_type, iv0->base), > - build_int_cst (niter_type, 1)); > - low = fold_convert (niter_type, TYPE_MIN_VALUE (type)); > - } > - else if (integer_zerop (iv0->step)) > - { > - /* TODO: Should handle case in which abs(step) != 1. */ > - if (!integer_onep (iv1->step)) > - return false; > - /* Give up on infinite loop. */ > - if (*code == LE_EXPR > - && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type))) > - return false; > - high = fold_convert (niter_type, TYPE_MAX_VALUE (type)); > - low = fold_build2 (MINUS_EXPR, niter_type, > - fold_convert (niter_type, iv1->base), > - build_int_cst (niter_type, 1)); > - } > - else > - gcc_unreachable (); > - > - iv0->base = low; > - iv0->step = fold_convert (niter_type, integer_one_node); > - iv1->base = high; > - iv1->step = build_int_cst (niter_type, 0); > - *code = NE_EXPR; > - return true; > -} > - > /* Determine the number of iterations according to condition (for staying > inside loop) which compares two induction variables using comparison > operator CODE. The induction variable on left side of the comparison > @@ -1855,15 +1884,6 @@ number_of_iterations_cond (class loop *loop, > return true; > } > > - /* Handle special case loops: while (i-- < 10) and while (10 < i++) by > - adjusting iv0, iv1 and code. */ > - if (code != NE_EXPR > - && (tree_int_cst_sign_bit (iv0->step) > - || (!integer_zerop (iv1->step) > - && !tree_int_cst_sign_bit (iv1->step))) > - && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1)) > - return false; > - > /* OK, now we know we have a senseful loop. Handle several cases, > depending > on what comparison operator is used. */ > bound_difference (loop, iv1->base, iv0->base, &bnds); > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)