Re: [PATCH][RFA][PR tree-optimization/79095] Improve overflow test optimization and avoid invalid warnings

Richard Biener Thu, 26 Jan 2017 01:39:07 -0800

On Thu, Jan 26, 2017 at 1:06 AM, Jeff Law <l...@redhat.com> wrote:
> As has been discussed extensively, we're not doing a good job at simplifying
> overflow tests, particularly those which collapse down to an EQ/NE test.
>
> x + -1 > x  -> x == 0
> x + -1 < x  -> x != 0
> x + 1 < x   -> x == -1U
> x + 1 > x   -> x != -1U
>
> The simplifications allow us to propagate a constant for X into one ARM of
> the associated IF/ELSE construct.  For C++ std::vector operations those
> propagations can eliminate lots of unnecessary code.
>
> Those propagations also eliminate (by way of removing unnecessary code)
> false positive warnings for memset calls that come from std::vector
> operations.
>
> This patch does two things.
>
> 1. It adds special case patterns to the A+CST CMP A pattern for cases where
> CST is 1 or -1 where the result turns into A EQ/NE 0 or A EQ/NE -1U.  These
> special patterns are applied regardless of the single_use status of the
> expression.
>
> 2. It adds a call to fold_stmt in simplify_cond_using_ranges.  This allows
> VRP to transform the code early and the first DOM pass to often see the
> simpified conditional and thus optimize better, rather than waiting for
> forwprop3 to simplify the conditional and the last DOM pass to optimize the
> code.
>
> Bootstrapped and regression tested on x86_64-linux-gnu.  OK for the trunk?


Marc mentions some reasons we have a lot of single_use predicates on condition
simplifications.  You may want to evaluate some of the examples (usually
they boil down to RTL if-conversion missed optimizations).  Other cases happen
when we can use CC flags of a previous computation that is live over the
conditional like

int foo (int b)
{
   int a = b - 1;
   if (a == 0)
     return b;
   return a;
}

if we transform that to if (b == 1) we lose because we don't see to undo that
on RTL (and we do seem to do that transform somewhere since basically
forever). gcc asm:

        movl    %edi, %eax
        movl    $1, %edx
        subl    $1, %eax
        cmove   %edx, %eax
        ret

clang asm:

        movl    %edi, %eax
        decl    %eax
        cmovel  %edi, %eax
        retq

in this case probably a missed uncprop opportunity on our side (rematerialize
the constant propagated 1 from b).

Comments below

>
>         PR tree-optimization/79095
>         * match.pd (A + CST CMP A): Add special cases for CST of 1 or -1.
>         * tree-vrp.c (simplify_cond_using_ranges): Accept GSI rather than
> statement.
>         Callers changed.  Just fold the conditional if no other
> simplifications
>         were possible.
>
>         PR tree-optimization/79095
>         * g++.dg/pr79095: New test.
>         * gcc.c-torture/execute/arith-1.c: Test additional cases.
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 7b96800..8178e9c 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3019,15 +3019,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>     ADD_OVERFLOW detection in tree-ssa-math-opts.c.
>     A + CST CMP A  ->  A CMP' CST' */
>  (for cmp (lt le ge gt)
> +     out_eqneq_zero (ne ne eq eq)
> +     out_eqneq_m1 (eq eq ne ne)
>       out (gt gt le le)
>   (simplify
>    (cmp:c (plus@2 @0 INTEGER_CST@1) @0)
>    (if (TYPE_UNSIGNED (TREE_TYPE (@0))
>         && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))
> +       && wi::eq_p (@1, 1))
> +   (out_eqneq_m1 @0 { wide_int_to_tree (TREE_TYPE (@0), wi::max_value
> +              (TYPE_PRECISION (TREE_TYPE (@0)), UNSIGNED)); })

build_minus_one_cst (TREE_TYPE (@0))

> +  (if (TYPE_UNSIGNED (TREE_TYPE (@0))
> +       && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))
> +       && wi::eq_p (@1, -1))
> +   (out_eqneq_zero @0 { fold_convert (TREE_TYPE (@0), integer_zero_node) ;
> })

build_zero_cst (TREE_TYPE (@0))

> +  (if (TYPE_UNSIGNED (TREE_TYPE (@0))
> +       && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))
>         && wi::ne_p (@1, 0)
>         && single_use (@2))
>     (out @0 { wide_int_to_tree (TREE_TYPE (@0), wi::max_value
> -              (TYPE_PRECISION (TREE_TYPE (@0)), UNSIGNED) - @1); }))))
> +              (TYPE_PRECISION (TREE_TYPE (@0)), UNSIGNED) - @1); }))))))

For a little CSE I'd use

    (if (TYPE_UNSIGNED (TREE_TYPE (@0))
        && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
     (switch
      (if (wi::eq_p (@1, 1))
        ...)
      (if (wi::eq_p (@1, -1))
       ...)
      (if (wi::ne_p (@1, 0) && single_use (@2))
          ...)))

which is also nicer for indentation.

I also notice we dont' handle 1 - A CMP A anywhere.

Richard.

>  /* To detect overflow in unsigned A - B, A < B is simpler than A - B > A.
>     However, the detection logic for SUB_OVERFLOW in tree-ssa-math-opts.c
> diff --git a/gcc/testsuite/g++.dg/pr79095.C b/gcc/testsuite/g++.dg/pr79095.C
> new file mode 100644
> index 0000000..edf3739
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/pr79095.C
> @@ -0,0 +1,35 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Wuninitialized -O2" } */
> +
> +typedef __SIZE_TYPE__ size_t;
> +
> +struct S {
> +  int *p0, *p1, *p2;
> +
> +  size_t size () const { return p1 - p0; }
> +
> +  void f (size_t n) {
> +    if (n > size ())       // can't happen because
> +      foo (n - size ());   //   n is in [1, MIN(size() - 1, 3)]
> +    else if (n < size ())
> +      bar (p0 + n);
> +  }
> +
> +  void foo (size_t n)
> +  {
> +    size_t left = (size_t)(p2 - p1);
> +    if (left >= n)
> +      __builtin_memset (p2, 0, n * sizeof *p2); /*  { dg-bogus "maximum
> object" "false warning" } */
> +  }
> +
> +  void bar (int*);
> +};
> +
> +void f (S &s)
> +{
> +  size_t n = s.size ();
> +  if (n > 1 && n < 5)
> +    s.f (n - 1);
> +}
> +
> +
> diff --git a/gcc/testsuite/gcc.c-torture/execute/arith-1.c
> b/gcc/testsuite/gcc.c-torture/execute/arith-1.c
> index 58df322..6168d77 100644
> --- a/gcc/testsuite/gcc.c-torture/execute/arith-1.c
> +++ b/gcc/testsuite/gcc.c-torture/execute/arith-1.c
> @@ -7,9 +7,41 @@ sat_add (unsigned i)
>    return ret;
>  }
>
> +unsigned
> +sat_add2 (unsigned i)
> +{
> +  unsigned ret = i + 1;
> +  if (ret > i)
> +    return ret;
> +  return i;
> +}
> +
> +unsigned
> +sat_add3 (unsigned i)
> +{
> +  unsigned ret = i - 1;
> +  if (ret > i)
> +    ret = i;
> +  return ret;
> +}
> +
> +unsigned
> +sat_add4 (unsigned i)
> +{
> +  unsigned ret = i - 1;
> +  if (ret < i)
> +    return ret;
> +  return i;
> +}
>  main ()
>  {
>    if (sat_add (~0U) != ~0U)
>      abort ();
> +  if (sat_add2 (~0U) != ~0U)
> +    abort ();
> +  if (sat_add3 (0U) != 0U)
> +    abort ();
> +  if (sat_add4 (0U) != 0U)
> +    abort ();
>    exit (0);
>  }
> diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
> index d7d7a0d..b4b6d8a 100644
> --- a/gcc/tree-vrp.c
> +++ b/gcc/tree-vrp.c
> @@ -9550,8 +9550,9 @@ range_fits_type_p (value_range *vr, unsigned
> dest_precision, signop dest_sgn)
>     the original conditional.  */
>
>  static bool
> -simplify_cond_using_ranges (gcond *stmt)
> +simplify_cond_using_ranges (gimple_stmt_iterator *gsi)
>  {
> +  gcond *stmt = as_a <gcond *> (gsi_stmt (*gsi));
>    tree op0 = gimple_cond_lhs (stmt);
>    tree op1 = gimple_cond_rhs (stmt);
>    enum tree_code cond_code = gimple_cond_code (stmt);
> @@ -9720,6 +9721,14 @@ simplify_cond_using_ranges (gcond *stmt)
>         }
>      }
>
> +  /* Finally, just try match.pd simplification.  This can convert some
> +     overflow checks into simple equality checks (for example).  */
> +  if (fold_stmt (gsi, NULL))
> +    {
> +      update_stmt (gsi_stmt (*gsi));
> +      return true;
> +    }
> +
>    return false;
>  }
>
> @@ -10318,7 +10328,7 @@ simplify_stmt_using_ranges (gimple_stmt_iterator
> *gsi)
>         }
>      }
>    else if (gimple_code (stmt) == GIMPLE_COND)
> -    return simplify_cond_using_ranges (as_a <gcond *> (stmt));
> +    return simplify_cond_using_ranges (gsi);
>    else if (gimple_code (stmt) == GIMPLE_SWITCH)
>      return simplify_switch_using_ranges (as_a <gswitch *> (stmt));
>    else if (is_gimple_call (stmt)
>

Re: [PATCH][RFA][PR tree-optimization/79095] Improve overflow test optimization and avoid invalid warnings

Reply via email to