Hi, This patch is to fix PR68529. In my previous scev/niter overflow patches, I only computed no-overflow information for control iv in simple loops with LT_EXPR as exit condition code. This bug is about loop with NE_EXPR as exit condition code. Given below example:
#include <stdio.h> #include <stdlib.h> int main(){ char c[10000]={}; unsigned int nchar=9999; while(nchar--!=0){ c[nchar]='A'; } printf("%s\n",c); return 0; } nchar used as an index to array 'c' doesn't overflow during loop iterations. Thus &c[nchar] acts as a scev. GCC now fails to do that. With this patch, this issue is fixed. Furthermore, the computation of no-overflow information could be improved by using TREE_OVERFLOW_UNDEFINED semantic of signed type for C/C++. I didn't do that because: 1) I doubt how useful it could be because I have already changed scev to use the semantic whenever possible. It doesn't need loop niter analysis' help. 2) To do that, I need to expose chrec_convert_aggressive information out of scev in function simple_iv, because that function could corrupt TREE_OVERFLOW_UNDEFINED semantic assumption. This isn't appropriate for Stage3. Bootstrap and test on x86_64 and x86. I don't expect any issue on aarch64 either. Is it OK? 2015-11-27 Bin Cheng <bin.ch...@arm.com> PR tree-optimization/68529 * tree-ssa-loop-niter.c (number_of_iterations_ne): Add new param. Compute no-overflow information for control iv. (number_of_iterations_lt, number_of_iterations_le): Add new param. (number_of_iterations_cond): Pass new argument to above functions. 2015-11-27 Bin Cheng <bin.ch...@arm.com> PR tree-optimization/68529 * gcc.dg/tree-ssa/pr68529-1.c: New test. * gcc.dg/tree-ssa/pr68529-2.c: New test. * gcc.dg/tree-ssa/pr68529-3.c: New test.
Index: gcc/testsuite/gcc.dg/tree-ssa/pr68529-1.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/pr68529-1.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/pr68529-1.c (working copy) @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */ + +void bar(char *s); +int foo() +{ + char c[10000] = {}; + unsigned short nchar = 9999; + + while(nchar-- != 0) + { + c[nchar] = 'A'; + } + + bar (c); + return 0; +} + +/* { dg-final { scan-tree-dump "distributed: split to 0 loops and 1 library calls" "ldist" } } */ +/* { dg-final { scan-tree-dump "generated memset" "ldist" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/pr68529-2.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/pr68529-2.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/pr68529-2.c (working copy) @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */ + +void bar(char *s); +int foo(unsigned short l) +{ + char c[10000] = {}; + unsigned short nchar = 9999; + + if (nchar <= l) + return -1; + + while(nchar-- != l) + { + c[nchar] = 'A'; + } + + bar (c); + return 0; +} + +/* { dg-final { scan-tree-dump "distributed: split to 0 loops and 1 library calls" "ldist" } } */ +/* { dg-final { scan-tree-dump "generated memset" "ldist" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/pr68529-3.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/pr68529-3.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/pr68529-3.c (working copy) @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */ + +void bar(char *s); +int foo(unsigned short l) +{ + char c[10000] = {}; + unsigned short nchar = 9999; + + if (nchar < l) + return -1; + + while(nchar-- != l) + { + c[nchar] = 'A'; + } + + bar (c); + return 0; +} + +/* { dg-final { scan-tree-dump "failed: evolution of offset is not affine" "ldist" } } */ Index: gcc/tree-ssa-loop-niter.c =================================================================== --- gcc/tree-ssa-loop-niter.c (revision 230945) +++ gcc/tree-ssa-loop-niter.c (working copy) @@ -957,13 +957,14 @@ number_of_iterations_ne_max (mpz_t bnd, bool no_ov bounds on the difference FINAL - IV->base. */ static bool -number_of_iterations_ne (tree type, affine_iv *iv, tree final, - struct tree_niter_desc *niter, bool exit_must_be_taken, - bounds *bnds) +number_of_iterations_ne (struct loop *loop, tree type, affine_iv *iv, + tree final, struct tree_niter_desc *niter, + bool exit_must_be_taken, bounds *bnds) { tree niter_type = unsigned_type_for (type); tree s, c, d, bits, assumption, tmp, bound; mpz_t max; + tree e; niter->control = *iv; niter->bound = final; @@ -998,6 +999,23 @@ static bool TYPE_SIGN (niter_type)); mpz_clear (max); + /* Compute no-overflow information for the control iv. Note we are + handling NE_EXPR, if iv base equals to final value, the loop exits + immediately, and the iv does not overflow. */ + if (tree_int_cst_sign_bit (iv->step)) + e = fold_build2 (GE_EXPR, boolean_type_node, iv->base, final); + else + e = fold_build2 (LE_EXPR, boolean_type_node, iv->base, final); + e = simplify_using_initial_conditions (loop, e); + if (integer_onep (e) + && (integer_onep (s) + || (TREE_CODE (c) == INTEGER_CST + && TREE_CODE (s) == INTEGER_CST + && wi::mod_trunc (c, s, TYPE_SIGN (type)) == 0))) + { + niter->control.no_overflow = true; + } + /* First the trivial cases -- when the step is 1. */ if (integer_onep (s)) { @@ -1361,8 +1379,8 @@ assert_loop_rolls_lt (tree type, affine_iv *iv0, a that the exit must be taken eventually. */ static bool -number_of_iterations_lt (tree type, affine_iv *iv0, affine_iv *iv1, - struct tree_niter_desc *niter, +number_of_iterations_lt (struct loop *loop, tree type, affine_iv *iv0, + affine_iv *iv1, struct tree_niter_desc *niter, bool exit_must_be_taken, bounds *bnds) { tree niter_type = unsigned_type_for (type); @@ -1434,7 +1452,8 @@ static bool zps does not overflow. */ zps.no_overflow = true; - return number_of_iterations_ne (type, &zps, delta, niter, true, bnds); + return number_of_iterations_ne (loop, type, &zps, + delta, niter, true, bnds); } /* Make sure that the control iv does not overflow. */ @@ -1473,9 +1492,9 @@ static bool is the case). BNDS bounds the difference IV1->base - IV0->base. */ static bool -number_of_iterations_le (tree type, affine_iv *iv0, affine_iv *iv1, - struct tree_niter_desc *niter, bool exit_must_be_taken, - bounds *bnds) +number_of_iterations_le (struct loop *loop, tree type, affine_iv *iv0, + affine_iv *iv1, struct tree_niter_desc *niter, + bool exit_must_be_taken, bounds *bnds) { tree assumption; tree type1 = type; @@ -1523,7 +1542,7 @@ static bool bounds_add (bnds, 1, type1); - return number_of_iterations_lt (type, iv0, iv1, niter, exit_must_be_taken, + return number_of_iterations_lt (loop, type, iv0, iv1, niter, exit_must_be_taken, bnds); } @@ -1698,18 +1717,18 @@ number_of_iterations_cond (struct loop *loop, { case NE_EXPR: gcc_assert (integer_zerop (iv1->step)); - ret = number_of_iterations_ne (type, iv0, iv1->base, niter, + ret = number_of_iterations_ne (loop, type, iv0, iv1->base, niter, exit_must_be_taken, &bnds); break; case LT_EXPR: - ret = number_of_iterations_lt (type, iv0, iv1, niter, exit_must_be_taken, - &bnds); + ret = number_of_iterations_lt (loop, type, iv0, iv1, niter, + exit_must_be_taken, &bnds); break; case LE_EXPR: - ret = number_of_iterations_le (type, iv0, iv1, niter, exit_must_be_taken, - &bnds); + ret = number_of_iterations_le (loop, type, iv0, iv1, niter, + exit_must_be_taken, &bnds); break; default: