On Fri, 18 Oct 2024, Richard Sandiford wrote:

> I tried to look for places where we were handling TRUNC_DIV_EXPR
> more favourably than EXACT_DIV_EXPR.
> 
> Most of the places that I looked at but didn't change were handling
> div/mod pairs.  But there's bound to be others I missed...

OK, but I'd prefer trunc_or_exact_div_p to be explicit.

Thanks,
Richard.

> gcc/
>       * match.pd: Extend some rules to handle exact_div like trunc_div.
>       * tree.h (trunc_div_p): New function.
>       * tree-ssa-loop-niter.cc (is_rshift_by_1): Use it.
>       * tree-ssa-loop-ivopts.cc (force_expr_to_var_cost): Handle
>       EXACT_DIV_EXPR.
> ---
>  gcc/match.pd                | 60 +++++++++++++++++++------------------
>  gcc/tree-ssa-loop-ivopts.cc |  2 ++
>  gcc/tree-ssa-loop-niter.cc  |  2 +-
>  gcc/tree.h                  | 13 ++++++++
>  4 files changed, 47 insertions(+), 30 deletions(-)
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 12d81fcac0d..4aea028a866 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -492,27 +492,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>     of A starting from shift's type sign bit are zero, as
>     (unsigned long long) (1 << 31) is -2147483648ULL, not 2147483648ULL,
>     so it is valid only if A >> 31 is zero.  */
> -(simplify
> - (trunc_div (convert?@0 @3) (convert2? (lshift integer_onep@1 @2)))
> - (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0))
> -      && (!VECTOR_TYPE_P (type)
> -       || target_supports_op_p (type, RSHIFT_EXPR, optab_vector)
> -       || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar))
> -      && (useless_type_conversion_p (type, TREE_TYPE (@1))
> -       || (element_precision (type) >= element_precision (TREE_TYPE (@1))
> -           && (TYPE_UNSIGNED (TREE_TYPE (@1))
> -               || (element_precision (type)
> -                   == element_precision (TREE_TYPE (@1)))
> -               || (INTEGRAL_TYPE_P (type)
> -                   && (tree_nonzero_bits (@0)
> -                       & wi::mask (element_precision (TREE_TYPE (@1)) - 1,
> -                                   true,
> -                                   element_precision (type))) == 0)))))
> -   (if (!VECTOR_TYPE_P (type)
> -     && useless_type_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1))
> -     && element_precision (TREE_TYPE (@3)) < element_precision (type))
> -    (convert (rshift @3 @2))
> -    (rshift @0 @2))))
> +(for div (trunc_div exact_div)
> + (simplify
> +  (div (convert?@0 @3) (convert2? (lshift integer_onep@1 @2)))
> +  (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0))
> +       && (!VECTOR_TYPE_P (type)
> +        || target_supports_op_p (type, RSHIFT_EXPR, optab_vector)
> +        || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar))
> +       && (useless_type_conversion_p (type, TREE_TYPE (@1))
> +        || (element_precision (type) >= element_precision (TREE_TYPE (@1))
> +            && (TYPE_UNSIGNED (TREE_TYPE (@1))
> +                || (element_precision (type)
> +                    == element_precision (TREE_TYPE (@1)))
> +                || (INTEGRAL_TYPE_P (type)
> +                    && (tree_nonzero_bits (@0)
> +                        & wi::mask (element_precision (TREE_TYPE (@1)) - 1,
> +                                    true,
> +                                    element_precision (type))) == 0)))))
> +    (if (!VECTOR_TYPE_P (type)
> +      && useless_type_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1))
> +      && element_precision (TREE_TYPE (@3)) < element_precision (type))
> +     (convert (rshift @3 @2))
> +     (rshift @0 @2)))))
>  
>  /* Preserve explicit divisions by 0: the C++ front-end wants to detect
>     undefined behavior in constexpr evaluation, and assuming that the division
> @@ -947,13 +948,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>                               { build_one_cst (utype); })))))))
>  
>  /* Simplify (unsigned t * 2)/2 -> unsigned t & 0x7FFFFFFF.  */
> -(simplify
> - (trunc_div (mult @0 integer_pow2p@1) @1)
> - (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) && TYPE_UNSIGNED (TREE_TYPE (@0)))
> -  (bit_and @0 { wide_int_to_tree
> -             (type, wi::mask (TYPE_PRECISION (type)
> -                              - wi::exact_log2 (wi::to_wide (@1)),
> -                              false, TYPE_PRECISION (type))); })))
> +(for div (trunc_div exact_div)
> + (simplify
> +  (div (mult @0 integer_pow2p@1) @1)
> +  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) && TYPE_UNSIGNED (TREE_TYPE (@0)))
> +   (bit_and @0 { wide_int_to_tree
> +              (type, wi::mask (TYPE_PRECISION (type)
> +                               - wi::exact_log2 (wi::to_wide (@1)),
> +                               false, TYPE_PRECISION (type))); }))))
>  
>  /* Simplify (unsigned t / 2) * 2 -> unsigned t & ~1.  */
>  (simplify
> @@ -5715,7 +5717,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  
>  /* Sink binary operation to branches, but only if we can fold it.  */
>  (for op (tcc_comparison plus minus mult bit_and bit_ior bit_xor
> -      lshift rshift rdiv trunc_div ceil_div floor_div round_div
> +      lshift rshift rdiv trunc_div ceil_div floor_div round_div exact_div
>        trunc_mod ceil_mod floor_mod round_mod min max)
>  /* (c ? a : b) op (c ? d : e)  -->  c ? (a op d) : (b op e) */
>   (simplify
> diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc
> index 7441324aec2..68d091dc4a3 100644
> --- a/gcc/tree-ssa-loop-ivopts.cc
> +++ b/gcc/tree-ssa-loop-ivopts.cc
> @@ -4369,6 +4369,7 @@ force_expr_to_var_cost (tree expr, bool speed)
>      case PLUS_EXPR:
>      case MINUS_EXPR:
>      case MULT_EXPR:
> +    case EXACT_DIV_EXPR:
>      case TRUNC_DIV_EXPR:
>      case BIT_AND_EXPR:
>      case BIT_IOR_EXPR:
> @@ -4482,6 +4483,7 @@ force_expr_to_var_cost (tree expr, bool speed)
>       return comp_cost (target_spill_cost [speed], 0);
>        break;
>  
> +    case EXACT_DIV_EXPR:
>      case TRUNC_DIV_EXPR:
>        /* Division by power of two is usually cheap, so we allow it.  Forbid
>        anything else.  */
> diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
> index f87731ef892..38be41b06b3 100644
> --- a/gcc/tree-ssa-loop-niter.cc
> +++ b/gcc/tree-ssa-loop-niter.cc
> @@ -2328,7 +2328,7 @@ is_rshift_by_1 (gassign *stmt)
>    if (gimple_assign_rhs_code (stmt) == RSHIFT_EXPR
>        && integer_onep (gimple_assign_rhs2 (stmt)))
>      return true;
> -  if (gimple_assign_rhs_code (stmt) == TRUNC_DIV_EXPR
> +  if (trunc_div_p (gimple_assign_rhs_code (stmt))
>        && tree_fits_shwi_p (gimple_assign_rhs2 (stmt))
>        && tree_to_shwi (gimple_assign_rhs2 (stmt)) == 2)
>      return true;
> diff --git a/gcc/tree.h b/gcc/tree.h
> index d324a3f42a6..db91cd685dc 100644
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -5601,6 +5601,19 @@ struct_ptr_hash (const void *a)
>    return (intptr_t)*x >> 4;
>  }
>  
> +/* Return true if CODE can be treated as a truncating division.
> +
> +   EXACT_DIV_EXPR can be treated as a truncating division in which the
> +   remainder is known to be zero.  However, if trunc_div_p gates the
> +   generation of new IL, the conservative choice for that new IL is
> +   TRUNC_DIV_EXPR rather than CODE.  Using CODE (EXACT_DIV_EXPR) would
> +   only be correct if the transformation preserves exactness.  */
> +inline bool
> +trunc_div_p (tree_code code)
> +{
> +  return code == TRUNC_DIV_EXPR || code == EXACT_DIV_EXPR;
> +}
> +
>  /* Return nonzero if CODE is a tree code that represents a truth value.  */
>  inline bool
>  truth_value_p (enum tree_code code)
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to