On Fri, 18 Oct 2024, Richard Sandiford wrote: > I tried to look for places where we were handling TRUNC_DIV_EXPR > more favourably than EXACT_DIV_EXPR. > > Most of the places that I looked at but didn't change were handling > div/mod pairs. But there's bound to be others I missed...
OK, but I'd prefer trunc_or_exact_div_p to be explicit. Thanks, Richard. > gcc/ > * match.pd: Extend some rules to handle exact_div like trunc_div. > * tree.h (trunc_div_p): New function. > * tree-ssa-loop-niter.cc (is_rshift_by_1): Use it. > * tree-ssa-loop-ivopts.cc (force_expr_to_var_cost): Handle > EXACT_DIV_EXPR. > --- > gcc/match.pd | 60 +++++++++++++++++++------------------ > gcc/tree-ssa-loop-ivopts.cc | 2 ++ > gcc/tree-ssa-loop-niter.cc | 2 +- > gcc/tree.h | 13 ++++++++ > 4 files changed, 47 insertions(+), 30 deletions(-) > > diff --git a/gcc/match.pd b/gcc/match.pd > index 12d81fcac0d..4aea028a866 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -492,27 +492,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > of A starting from shift's type sign bit are zero, as > (unsigned long long) (1 << 31) is -2147483648ULL, not 2147483648ULL, > so it is valid only if A >> 31 is zero. */ > -(simplify > - (trunc_div (convert?@0 @3) (convert2? (lshift integer_onep@1 @2))) > - (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0)) > - && (!VECTOR_TYPE_P (type) > - || target_supports_op_p (type, RSHIFT_EXPR, optab_vector) > - || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar)) > - && (useless_type_conversion_p (type, TREE_TYPE (@1)) > - || (element_precision (type) >= element_precision (TREE_TYPE (@1)) > - && (TYPE_UNSIGNED (TREE_TYPE (@1)) > - || (element_precision (type) > - == element_precision (TREE_TYPE (@1))) > - || (INTEGRAL_TYPE_P (type) > - && (tree_nonzero_bits (@0) > - & wi::mask (element_precision (TREE_TYPE (@1)) - 1, > - true, > - element_precision (type))) == 0))))) > - (if (!VECTOR_TYPE_P (type) > - && useless_type_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)) > - && element_precision (TREE_TYPE (@3)) < element_precision (type)) > - (convert (rshift @3 @2)) > - (rshift @0 @2)))) > +(for div (trunc_div exact_div) > + (simplify > + (div (convert?@0 @3) (convert2? (lshift integer_onep@1 @2))) > + (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0)) > + && (!VECTOR_TYPE_P (type) > + || target_supports_op_p (type, RSHIFT_EXPR, optab_vector) > + || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar)) > + && (useless_type_conversion_p (type, TREE_TYPE (@1)) > + || (element_precision (type) >= element_precision (TREE_TYPE (@1)) > + && (TYPE_UNSIGNED (TREE_TYPE (@1)) > + || (element_precision (type) > + == element_precision (TREE_TYPE (@1))) > + || (INTEGRAL_TYPE_P (type) > + && (tree_nonzero_bits (@0) > + & wi::mask (element_precision (TREE_TYPE (@1)) - 1, > + true, > + element_precision (type))) == 0))))) > + (if (!VECTOR_TYPE_P (type) > + && useless_type_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)) > + && element_precision (TREE_TYPE (@3)) < element_precision (type)) > + (convert (rshift @3 @2)) > + (rshift @0 @2))))) > > /* Preserve explicit divisions by 0: the C++ front-end wants to detect > undefined behavior in constexpr evaluation, and assuming that the division > @@ -947,13 +948,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > { build_one_cst (utype); }))))))) > > /* Simplify (unsigned t * 2)/2 -> unsigned t & 0x7FFFFFFF. */ > -(simplify > - (trunc_div (mult @0 integer_pow2p@1) @1) > - (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) && TYPE_UNSIGNED (TREE_TYPE (@0))) > - (bit_and @0 { wide_int_to_tree > - (type, wi::mask (TYPE_PRECISION (type) > - - wi::exact_log2 (wi::to_wide (@1)), > - false, TYPE_PRECISION (type))); }))) > +(for div (trunc_div exact_div) > + (simplify > + (div (mult @0 integer_pow2p@1) @1) > + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) && TYPE_UNSIGNED (TREE_TYPE (@0))) > + (bit_and @0 { wide_int_to_tree > + (type, wi::mask (TYPE_PRECISION (type) > + - wi::exact_log2 (wi::to_wide (@1)), > + false, TYPE_PRECISION (type))); })))) > > /* Simplify (unsigned t / 2) * 2 -> unsigned t & ~1. */ > (simplify > @@ -5715,7 +5717,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > /* Sink binary operation to branches, but only if we can fold it. */ > (for op (tcc_comparison plus minus mult bit_and bit_ior bit_xor > - lshift rshift rdiv trunc_div ceil_div floor_div round_div > + lshift rshift rdiv trunc_div ceil_div floor_div round_div exact_div > trunc_mod ceil_mod floor_mod round_mod min max) > /* (c ? a : b) op (c ? d : e) --> c ? (a op d) : (b op e) */ > (simplify > diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc > index 7441324aec2..68d091dc4a3 100644 > --- a/gcc/tree-ssa-loop-ivopts.cc > +++ b/gcc/tree-ssa-loop-ivopts.cc > @@ -4369,6 +4369,7 @@ force_expr_to_var_cost (tree expr, bool speed) > case PLUS_EXPR: > case MINUS_EXPR: > case MULT_EXPR: > + case EXACT_DIV_EXPR: > case TRUNC_DIV_EXPR: > case BIT_AND_EXPR: > case BIT_IOR_EXPR: > @@ -4482,6 +4483,7 @@ force_expr_to_var_cost (tree expr, bool speed) > return comp_cost (target_spill_cost [speed], 0); > break; > > + case EXACT_DIV_EXPR: > case TRUNC_DIV_EXPR: > /* Division by power of two is usually cheap, so we allow it. Forbid > anything else. */ > diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc > index f87731ef892..38be41b06b3 100644 > --- a/gcc/tree-ssa-loop-niter.cc > +++ b/gcc/tree-ssa-loop-niter.cc > @@ -2328,7 +2328,7 @@ is_rshift_by_1 (gassign *stmt) > if (gimple_assign_rhs_code (stmt) == RSHIFT_EXPR > && integer_onep (gimple_assign_rhs2 (stmt))) > return true; > - if (gimple_assign_rhs_code (stmt) == TRUNC_DIV_EXPR > + if (trunc_div_p (gimple_assign_rhs_code (stmt)) > && tree_fits_shwi_p (gimple_assign_rhs2 (stmt)) > && tree_to_shwi (gimple_assign_rhs2 (stmt)) == 2) > return true; > diff --git a/gcc/tree.h b/gcc/tree.h > index d324a3f42a6..db91cd685dc 100644 > --- a/gcc/tree.h > +++ b/gcc/tree.h > @@ -5601,6 +5601,19 @@ struct_ptr_hash (const void *a) > return (intptr_t)*x >> 4; > } > > +/* Return true if CODE can be treated as a truncating division. > + > + EXACT_DIV_EXPR can be treated as a truncating division in which the > + remainder is known to be zero. However, if trunc_div_p gates the > + generation of new IL, the conservative choice for that new IL is > + TRUNC_DIV_EXPR rather than CODE. Using CODE (EXACT_DIV_EXPR) would > + only be correct if the transformation preserves exactness. */ > +inline bool > +trunc_div_p (tree_code code) > +{ > + return code == TRUNC_DIV_EXPR || code == EXACT_DIV_EXPR; > +} > + > /* Return nonzero if CODE is a tree code that represents a truth value. */ > inline bool > truth_value_p (enum tree_code code) > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)