On Mon, Feb 2, 2015 at 9:19 AM, Ilya Enkovich <enkovich....@gmail.com> wrote: > On 27 Jan 12:29, Richard Biener wrote: >> On Tue, Jan 27, 2015 at 11:47 AM, Ilya Enkovich <enkovich....@gmail.com> >> wrote: >> > On 27 Jan 12:40, Ilya Enkovich wrote: >> >> Hi, >> >> >> >> This patch was supposed to fix PR tree-optimization/64277. Tracker is >> >> now fixed by warnings disabling but I think patch is still useful to >> >> avoid dead code generated by complete unroll. >> >> >> >> Bootstrapped and tested on x86_64-unknown-linux-gnu. >> >> >> >> Thanks, >> >> Ilya >> >> -- >> >> gcc/ >> >> >> >> 2015-01-27 Ilya Enkovich <ilya.enkov...@intel.com> >> >> >> >> * tree-ssa-loop-niter.c (record_nonwrapping_iv): Use base >> >> range info when possible to refine estimation. >> >> >> >> gcc/testsuite/ >> >> >> >> 2015-01-27 Ilya Enkovich <ilya.enkov...@intel.com> >> >> >> >> * gcc.dg/pr64277.c: New. >> >> >> >> >> > >> > Here is a new version fixed according to comments in the tracker. I also >> > fixed a test to scan cunroll dumps. Does it look OK? >> >> Minor comments below. >> >> > What are possible branches for this patch? >> >> You can probably create a testcase that shows code-size regressions >> against a version that didn't peel completely (GCC 4.7). Thus I'd say >> it would apply to 4.9 as well (4.8 doesn't have range information). >> >> > Thanks, >> > Ilya >> > -- >> > diff --git a/gcc/testsuite/gcc.dg/pr64277.c >> > b/gcc/testsuite/gcc.dg/pr64277.c >> > new file mode 100644 >> > index 0000000..c6ef331 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.dg/pr64277.c >> > @@ -0,0 +1,23 @@ >> > +/* PR tree-optimization/64277 */ >> > +/* { dg-do compile } */ >> > +/* { dg-options "-O3 -Wall -Werror -fdump-tree-cunroll-details" } */ >> > +/* { dg-final { scan-tree-dump "loop with 5 iterations completely >> > unrolled" "cunroll" } } */ >> > +/* { dg-final { scan-tree-dump "loop with 6 iterations completely >> > unrolled" "cunroll" } } */ >> > +/* { dg-final { cleanup-tree-dump "cunroll" } } */ >> > + >> > +int f1[10]; >> > +void test1 (short a[], short m, unsigned short l) >> > +{ >> > + int i = l; >> > + for (i = i + 5; i < m; i++) >> > + f1[i] = a[i]++; >> > +} >> > + >> > +void test2 (short a[], short m, short l) >> > +{ >> > + int i; >> > + if (m > 5) >> > + m = 5; >> > + for (i = m; i > l; i--) >> > + f1[i] = a[i]++; >> > +} >> > diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c >> > index 919f5c0..1cd297d 100644 >> > --- a/gcc/tree-ssa-loop-niter.c >> > +++ b/gcc/tree-ssa-loop-niter.c >> > @@ -2754,6 +2754,7 @@ record_nonwrapping_iv (struct loop *loop, tree base, >> > tree step, gimple stmt, >> > { >> > tree niter_bound, extreme, delta; >> > tree type = TREE_TYPE (base), unsigned_type; >> > + tree orig_base = base; >> > >> > if (TREE_CODE (step) != INTEGER_CST || integer_zerop (step)) >> > return; >> > @@ -2777,16 +2778,30 @@ record_nonwrapping_iv (struct loop *loop, tree >> > base, tree step, gimple stmt, >> > >> > if (tree_int_cst_sign_bit (step)) >> > { >> > + wide_int min, max; >> > extreme = fold_convert (unsigned_type, low); >> > - if (TREE_CODE (base) != INTEGER_CST) >> > + if (TREE_CODE (orig_base) == SSA_NAME >> > + && TREE_CODE (high) == INTEGER_CST >> > + && INTEGRAL_TYPE_P (TREE_TYPE (orig_base)) >> > + && get_range_info (orig_base, &min, &max) == VR_RANGE >> > + && wi::gts_p (wide_int (high), max)) >> >> For me a simple wi::gts_p (high, max) worked fine. >> >> > + base = wide_int_to_tree (unsigned_type, max); >> > + else if (TREE_CODE (base) != INTEGER_CST) >> > base = fold_convert (unsigned_type, high); >> > delta = fold_build2 (MINUS_EXPR, unsigned_type, base, extreme); >> > step = fold_build1 (NEGATE_EXPR, unsigned_type, step); >> > } >> > else >> > { >> > + wide_int min, max; >> > extreme = fold_convert (unsigned_type, high); >> > - if (TREE_CODE (base) != INTEGER_CST) >> > + if (TREE_CODE (orig_base) == SSA_NAME >> > + && TREE_CODE (low) == INTEGER_CST >> > + && INTEGRAL_TYPE_P (TREE_TYPE (orig_base)) >> > + && get_range_info (orig_base, &min, &max) == VR_RANGE >> > + && wi::gts_p (min, wide_int (low))) >> >> Likewise. >> >> Ok for trunk with that changes. For the 4.9 branch you need to adjust >> the patch to not use wide-ints. I'd leave it on trunk for a while and >> eventually open a bugreport for the size regression to keep track of it. >> >> Thanks, >> Richard. >> > > Here is a version for 4.9 branch. Does it look OK? Bootstrapped and tested > on x86_64-unknown-linux-gnu.
I don't think we want this kind of fixes on the branch. Thanks, Richard. > Thanks, > Ilya > -- > gcc/ > > 2015-02-02 Ilya Enkovich <ilya.enkov...@intel.com> > > PR tree-optimization/64277 > * tree-ssa-loop-niter.c (record_nonwrapping_iv): Use base > range info when possible to refine estimation. > > gcc/testsuite/ > > 2015-02-02 Ilya Enkovich <ilya.enkov...@intel.com> > > PR tree-optimization/64277 > * gcc.dg/pr64277.c: New. > > > diff --git a/gcc/testsuite/gcc.dg/pr64277.c b/gcc/testsuite/gcc.dg/pr64277.c > new file mode 100644 > index 0000000..c6ef331 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/pr64277.c > @@ -0,0 +1,23 @@ > +/* PR tree-optimization/64277 */ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -Wall -Werror -fdump-tree-cunroll-details" } */ > +/* { dg-final { scan-tree-dump "loop with 5 iterations completely unrolled" > "cunroll" } } */ > +/* { dg-final { scan-tree-dump "loop with 6 iterations completely unrolled" > "cunroll" } } */ > +/* { dg-final { cleanup-tree-dump "cunroll" } } */ > + > +int f1[10]; > +void test1 (short a[], short m, unsigned short l) > +{ > + int i = l; > + for (i = i + 5; i < m; i++) > + f1[i] = a[i]++; > +} > + > +void test2 (short a[], short m, short l) > +{ > + int i; > + if (m > 5) > + m = 5; > + for (i = m; i > l; i--) > + f1[i] = a[i]++; > +} > diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c > index 897b8f5..8fb72b6 100644 > --- a/gcc/tree-ssa-loop-niter.c > +++ b/gcc/tree-ssa-loop-niter.c > @@ -2727,6 +2727,7 @@ record_nonwrapping_iv (struct loop *loop, tree base, > tree step, gimple stmt, > tree niter_bound, extreme, delta; > tree type = TREE_TYPE (base), unsigned_type; > double_int max; > + tree orig_base = base; > > if (TREE_CODE (step) != INTEGER_CST || integer_zerop (step)) > return; > @@ -2750,7 +2751,14 @@ record_nonwrapping_iv (struct loop *loop, tree base, > tree step, gimple stmt, > > if (tree_int_cst_sign_bit (step)) > { > + double_int min, max; > extreme = fold_convert (unsigned_type, low); > + if (TREE_CODE (orig_base) == SSA_NAME > + && TREE_CODE (high) == INTEGER_CST > + && INTEGRAL_TYPE_P (TREE_TYPE (orig_base)) > + && get_range_info (orig_base, &min, &max) == VR_RANGE > + && max.slt (TREE_INT_CST (high))) > + base = double_int_to_tree (unsigned_type, max); > if (TREE_CODE (base) != INTEGER_CST) > base = fold_convert (unsigned_type, high); > delta = fold_build2 (MINUS_EXPR, unsigned_type, base, extreme); > @@ -2758,8 +2766,15 @@ record_nonwrapping_iv (struct loop *loop, tree base, > tree step, gimple stmt, > } > else > { > + double_int min, max; > extreme = fold_convert (unsigned_type, high); > - if (TREE_CODE (base) != INTEGER_CST) > + if (TREE_CODE (orig_base) == SSA_NAME > + && TREE_CODE (low) == INTEGER_CST > + && INTEGRAL_TYPE_P (TREE_TYPE (orig_base)) > + && get_range_info (orig_base, &min, &max) == VR_RANGE > + && min.sgt (TREE_INT_CST (low))) > + base = double_int_to_tree (unsigned_type, min); > + else if (TREE_CODE (base) != INTEGER_CST) > base = fold_convert (unsigned_type, low); > delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, base); > }