On Thu, Jan 26, 2017 at 1:06 AM, Jeff Law <l...@redhat.com> wrote: > As has been discussed extensively, we're not doing a good job at simplifying > overflow tests, particularly those which collapse down to an EQ/NE test. > > x + -1 > x -> x == 0 > x + -1 < x -> x != 0 > x + 1 < x -> x == -1U > x + 1 > x -> x != -1U > > The simplifications allow us to propagate a constant for X into one ARM of > the associated IF/ELSE construct. For C++ std::vector operations those > propagations can eliminate lots of unnecessary code. > > Those propagations also eliminate (by way of removing unnecessary code) > false positive warnings for memset calls that come from std::vector > operations. > > This patch does two things. > > 1. It adds special case patterns to the A+CST CMP A pattern for cases where > CST is 1 or -1 where the result turns into A EQ/NE 0 or A EQ/NE -1U. These > special patterns are applied regardless of the single_use status of the > expression. > > 2. It adds a call to fold_stmt in simplify_cond_using_ranges. This allows > VRP to transform the code early and the first DOM pass to often see the > simpified conditional and thus optimize better, rather than waiting for > forwprop3 to simplify the conditional and the last DOM pass to optimize the > code. > > Bootstrapped and regression tested on x86_64-linux-gnu. OK for the trunk?
Marc mentions some reasons we have a lot of single_use predicates on condition simplifications. You may want to evaluate some of the examples (usually they boil down to RTL if-conversion missed optimizations). Other cases happen when we can use CC flags of a previous computation that is live over the conditional like int foo (int b) { int a = b - 1; if (a == 0) return b; return a; } if we transform that to if (b == 1) we lose because we don't see to undo that on RTL (and we do seem to do that transform somewhere since basically forever). gcc asm: movl %edi, %eax movl $1, %edx subl $1, %eax cmove %edx, %eax ret clang asm: movl %edi, %eax decl %eax cmovel %edi, %eax retq in this case probably a missed uncprop opportunity on our side (rematerialize the constant propagated 1 from b). Comments below > > PR tree-optimization/79095 > * match.pd (A + CST CMP A): Add special cases for CST of 1 or -1. > * tree-vrp.c (simplify_cond_using_ranges): Accept GSI rather than > statement. > Callers changed. Just fold the conditional if no other > simplifications > were possible. > > PR tree-optimization/79095 > * g++.dg/pr79095: New test. > * gcc.c-torture/execute/arith-1.c: Test additional cases. > > diff --git a/gcc/match.pd b/gcc/match.pd > index 7b96800..8178e9c 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -3019,15 +3019,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > ADD_OVERFLOW detection in tree-ssa-math-opts.c. > A + CST CMP A -> A CMP' CST' */ > (for cmp (lt le ge gt) > + out_eqneq_zero (ne ne eq eq) > + out_eqneq_m1 (eq eq ne ne) > out (gt gt le le) > (simplify > (cmp:c (plus@2 @0 INTEGER_CST@1) @0) > (if (TYPE_UNSIGNED (TREE_TYPE (@0)) > && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)) > + && wi::eq_p (@1, 1)) > + (out_eqneq_m1 @0 { wide_int_to_tree (TREE_TYPE (@0), wi::max_value > + (TYPE_PRECISION (TREE_TYPE (@0)), UNSIGNED)); }) build_minus_one_cst (TREE_TYPE (@0)) > + (if (TYPE_UNSIGNED (TREE_TYPE (@0)) > + && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)) > + && wi::eq_p (@1, -1)) > + (out_eqneq_zero @0 { fold_convert (TREE_TYPE (@0), integer_zero_node) ; > }) build_zero_cst (TREE_TYPE (@0)) > + (if (TYPE_UNSIGNED (TREE_TYPE (@0)) > + && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)) > && wi::ne_p (@1, 0) > && single_use (@2)) > (out @0 { wide_int_to_tree (TREE_TYPE (@0), wi::max_value > - (TYPE_PRECISION (TREE_TYPE (@0)), UNSIGNED) - @1); })))) > + (TYPE_PRECISION (TREE_TYPE (@0)), UNSIGNED) - @1); })))))) For a little CSE I'd use (if (TYPE_UNSIGNED (TREE_TYPE (@0)) && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))) (switch (if (wi::eq_p (@1, 1)) ...) (if (wi::eq_p (@1, -1)) ...) (if (wi::ne_p (@1, 0) && single_use (@2)) ...))) which is also nicer for indentation. I also notice we dont' handle 1 - A CMP A anywhere. Richard. > /* To detect overflow in unsigned A - B, A < B is simpler than A - B > A. > However, the detection logic for SUB_OVERFLOW in tree-ssa-math-opts.c > diff --git a/gcc/testsuite/g++.dg/pr79095.C b/gcc/testsuite/g++.dg/pr79095.C > new file mode 100644 > index 0000000..edf3739 > --- /dev/null > +++ b/gcc/testsuite/g++.dg/pr79095.C > @@ -0,0 +1,35 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Wuninitialized -O2" } */ > + > +typedef __SIZE_TYPE__ size_t; > + > +struct S { > + int *p0, *p1, *p2; > + > + size_t size () const { return p1 - p0; } > + > + void f (size_t n) { > + if (n > size ()) // can't happen because > + foo (n - size ()); // n is in [1, MIN(size() - 1, 3)] > + else if (n < size ()) > + bar (p0 + n); > + } > + > + void foo (size_t n) > + { > + size_t left = (size_t)(p2 - p1); > + if (left >= n) > + __builtin_memset (p2, 0, n * sizeof *p2); /* { dg-bogus "maximum > object" "false warning" } */ > + } > + > + void bar (int*); > +}; > + > +void f (S &s) > +{ > + size_t n = s.size (); > + if (n > 1 && n < 5) > + s.f (n - 1); > +} > + > + > diff --git a/gcc/testsuite/gcc.c-torture/execute/arith-1.c > b/gcc/testsuite/gcc.c-torture/execute/arith-1.c > index 58df322..6168d77 100644 > --- a/gcc/testsuite/gcc.c-torture/execute/arith-1.c > +++ b/gcc/testsuite/gcc.c-torture/execute/arith-1.c > @@ -7,9 +7,41 @@ sat_add (unsigned i) > return ret; > } > > +unsigned > +sat_add2 (unsigned i) > +{ > + unsigned ret = i + 1; > + if (ret > i) > + return ret; > + return i; > +} > + > +unsigned > +sat_add3 (unsigned i) > +{ > + unsigned ret = i - 1; > + if (ret > i) > + ret = i; > + return ret; > +} > + > +unsigned > +sat_add4 (unsigned i) > +{ > + unsigned ret = i - 1; > + if (ret < i) > + return ret; > + return i; > +} > main () > { > if (sat_add (~0U) != ~0U) > abort (); > + if (sat_add2 (~0U) != ~0U) > + abort (); > + if (sat_add3 (0U) != 0U) > + abort (); > + if (sat_add4 (0U) != 0U) > + abort (); > exit (0); > } > diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c > index d7d7a0d..b4b6d8a 100644 > --- a/gcc/tree-vrp.c > +++ b/gcc/tree-vrp.c > @@ -9550,8 +9550,9 @@ range_fits_type_p (value_range *vr, unsigned > dest_precision, signop dest_sgn) > the original conditional. */ > > static bool > -simplify_cond_using_ranges (gcond *stmt) > +simplify_cond_using_ranges (gimple_stmt_iterator *gsi) > { > + gcond *stmt = as_a <gcond *> (gsi_stmt (*gsi)); > tree op0 = gimple_cond_lhs (stmt); > tree op1 = gimple_cond_rhs (stmt); > enum tree_code cond_code = gimple_cond_code (stmt); > @@ -9720,6 +9721,14 @@ simplify_cond_using_ranges (gcond *stmt) > } > } > > + /* Finally, just try match.pd simplification. This can convert some > + overflow checks into simple equality checks (for example). */ > + if (fold_stmt (gsi, NULL)) > + { > + update_stmt (gsi_stmt (*gsi)); > + return true; > + } > + > return false; > } > > @@ -10318,7 +10328,7 @@ simplify_stmt_using_ranges (gimple_stmt_iterator > *gsi) > } > } > else if (gimple_code (stmt) == GIMPLE_COND) > - return simplify_cond_using_ranges (as_a <gcond *> (stmt)); > + return simplify_cond_using_ranges (gsi); > else if (gimple_code (stmt) == GIMPLE_SWITCH) > return simplify_switch_using_ranges (as_a <gswitch *> (stmt)); > else if (is_gimple_call (stmt) >