I tried to look for places where we were handling TRUNC_DIV_EXPR more favourably than EXACT_DIV_EXPR.
Most of the places that I looked at but didn't change were handling div/mod pairs. But there's bound to be others I missed... gcc/ * match.pd: Extend some rules to handle exact_div like trunc_div. * tree.h (trunc_div_p): New function. * tree-ssa-loop-niter.cc (is_rshift_by_1): Use it. * tree-ssa-loop-ivopts.cc (force_expr_to_var_cost): Handle EXACT_DIV_EXPR. --- gcc/match.pd | 60 +++++++++++++++++++------------------ gcc/tree-ssa-loop-ivopts.cc | 2 ++ gcc/tree-ssa-loop-niter.cc | 2 +- gcc/tree.h | 13 ++++++++ 4 files changed, 47 insertions(+), 30 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 12d81fcac0d..4aea028a866 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -492,27 +492,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) of A starting from shift's type sign bit are zero, as (unsigned long long) (1 << 31) is -2147483648ULL, not 2147483648ULL, so it is valid only if A >> 31 is zero. */ -(simplify - (trunc_div (convert?@0 @3) (convert2? (lshift integer_onep@1 @2))) - (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0)) - && (!VECTOR_TYPE_P (type) - || target_supports_op_p (type, RSHIFT_EXPR, optab_vector) - || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar)) - && (useless_type_conversion_p (type, TREE_TYPE (@1)) - || (element_precision (type) >= element_precision (TREE_TYPE (@1)) - && (TYPE_UNSIGNED (TREE_TYPE (@1)) - || (element_precision (type) - == element_precision (TREE_TYPE (@1))) - || (INTEGRAL_TYPE_P (type) - && (tree_nonzero_bits (@0) - & wi::mask (element_precision (TREE_TYPE (@1)) - 1, - true, - element_precision (type))) == 0))))) - (if (!VECTOR_TYPE_P (type) - && useless_type_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)) - && element_precision (TREE_TYPE (@3)) < element_precision (type)) - (convert (rshift @3 @2)) - (rshift @0 @2)))) +(for div (trunc_div exact_div) + (simplify + (div (convert?@0 @3) (convert2? (lshift integer_onep@1 @2))) + (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0)) + && (!VECTOR_TYPE_P (type) + || target_supports_op_p (type, RSHIFT_EXPR, optab_vector) + || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar)) + && (useless_type_conversion_p (type, TREE_TYPE (@1)) + || (element_precision (type) >= element_precision (TREE_TYPE (@1)) + && (TYPE_UNSIGNED (TREE_TYPE (@1)) + || (element_precision (type) + == element_precision (TREE_TYPE (@1))) + || (INTEGRAL_TYPE_P (type) + && (tree_nonzero_bits (@0) + & wi::mask (element_precision (TREE_TYPE (@1)) - 1, + true, + element_precision (type))) == 0))))) + (if (!VECTOR_TYPE_P (type) + && useless_type_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)) + && element_precision (TREE_TYPE (@3)) < element_precision (type)) + (convert (rshift @3 @2)) + (rshift @0 @2))))) /* Preserve explicit divisions by 0: the C++ front-end wants to detect undefined behavior in constexpr evaluation, and assuming that the division @@ -947,13 +948,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) { build_one_cst (utype); }))))))) /* Simplify (unsigned t * 2)/2 -> unsigned t & 0x7FFFFFFF. */ -(simplify - (trunc_div (mult @0 integer_pow2p@1) @1) - (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) && TYPE_UNSIGNED (TREE_TYPE (@0))) - (bit_and @0 { wide_int_to_tree - (type, wi::mask (TYPE_PRECISION (type) - - wi::exact_log2 (wi::to_wide (@1)), - false, TYPE_PRECISION (type))); }))) +(for div (trunc_div exact_div) + (simplify + (div (mult @0 integer_pow2p@1) @1) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) && TYPE_UNSIGNED (TREE_TYPE (@0))) + (bit_and @0 { wide_int_to_tree + (type, wi::mask (TYPE_PRECISION (type) + - wi::exact_log2 (wi::to_wide (@1)), + false, TYPE_PRECISION (type))); })))) /* Simplify (unsigned t / 2) * 2 -> unsigned t & ~1. */ (simplify @@ -5715,7 +5717,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* Sink binary operation to branches, but only if we can fold it. */ (for op (tcc_comparison plus minus mult bit_and bit_ior bit_xor - lshift rshift rdiv trunc_div ceil_div floor_div round_div + lshift rshift rdiv trunc_div ceil_div floor_div round_div exact_div trunc_mod ceil_mod floor_mod round_mod min max) /* (c ? a : b) op (c ? d : e) --> c ? (a op d) : (b op e) */ (simplify diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc index 7441324aec2..68d091dc4a3 100644 --- a/gcc/tree-ssa-loop-ivopts.cc +++ b/gcc/tree-ssa-loop-ivopts.cc @@ -4369,6 +4369,7 @@ force_expr_to_var_cost (tree expr, bool speed) case PLUS_EXPR: case MINUS_EXPR: case MULT_EXPR: + case EXACT_DIV_EXPR: case TRUNC_DIV_EXPR: case BIT_AND_EXPR: case BIT_IOR_EXPR: @@ -4482,6 +4483,7 @@ force_expr_to_var_cost (tree expr, bool speed) return comp_cost (target_spill_cost [speed], 0); break; + case EXACT_DIV_EXPR: case TRUNC_DIV_EXPR: /* Division by power of two is usually cheap, so we allow it. Forbid anything else. */ diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc index f87731ef892..38be41b06b3 100644 --- a/gcc/tree-ssa-loop-niter.cc +++ b/gcc/tree-ssa-loop-niter.cc @@ -2328,7 +2328,7 @@ is_rshift_by_1 (gassign *stmt) if (gimple_assign_rhs_code (stmt) == RSHIFT_EXPR && integer_onep (gimple_assign_rhs2 (stmt))) return true; - if (gimple_assign_rhs_code (stmt) == TRUNC_DIV_EXPR + if (trunc_div_p (gimple_assign_rhs_code (stmt)) && tree_fits_shwi_p (gimple_assign_rhs2 (stmt)) && tree_to_shwi (gimple_assign_rhs2 (stmt)) == 2) return true; diff --git a/gcc/tree.h b/gcc/tree.h index d324a3f42a6..db91cd685dc 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -5601,6 +5601,19 @@ struct_ptr_hash (const void *a) return (intptr_t)*x >> 4; } +/* Return true if CODE can be treated as a truncating division. + + EXACT_DIV_EXPR can be treated as a truncating division in which the + remainder is known to be zero. However, if trunc_div_p gates the + generation of new IL, the conservative choice for that new IL is + TRUNC_DIV_EXPR rather than CODE. Using CODE (EXACT_DIV_EXPR) would + only be correct if the transformation preserves exactness. */ +inline bool +trunc_div_p (tree_code code) +{ + return code == TRUNC_DIV_EXPR || code == EXACT_DIV_EXPR; +} + /* Return nonzero if CODE is a tree code that represents a truth value. */ inline bool truth_value_p (enum tree_code code) -- 2.25.1