https://gcc.gnu.org/g:497e567c806af40f3ec3b135fae84de8d8b23818
commit 497e567c806af40f3ec3b135fae84de8d8b23818 Author: Alexandre Oliva <ol...@gnu.org> Date: Fri Nov 15 23:18:20 2024 -0300 drop expensive mergeable tests in favor of gimple_vuse compares Diff: --- gcc/gimple-fold.cc | 179 +++++++++------------------------- gcc/testsuite/gcc.dg/field-merge-10.c | 36 +++++++ gcc/tree-ssa-ifcombine.cc | 3 +- 3 files changed, 86 insertions(+), 132 deletions(-) diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index b7b4558ee2b4..6cb03ee09d19 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -7721,7 +7721,7 @@ build_split_load (tree /* out */ ln_arg[2], tree type = lang_hooks.types.type_for_size (bitsiz[i], 1); bitpos[i] = bit_pos; ln_arg[i] = make_bit_field_load (loc, inner, orig_inner, - type, bitsiz[i], + type, bitsiz[i], bit_pos, 1, reversep, point[i]); bit_pos += bitsiz[i]; } @@ -7781,110 +7781,58 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2], } } -/* Return nonzero if LSTMT and RSTMT, assumed to load from the same - words, can be safely merged into a single load. Return zero if - there are intervening stores, or if neither stmt dominates the - other. If they are mergeable, return -1 if the merged load should - be inserted before LSTMT to dominate both, otherwise return +1. */ - -static int -mergeable_loads_p (gimple *lstmt, gimple *rstmt) -{ - gimple *stmts[2] = { lstmt, rstmt }; - int ret; - - if (stmt_dominates_stmt_p (lstmt, rstmt)) - { - ret = -1; - stmts[0] = lstmt; - stmts[1] = rstmt; - } - else if (stmt_dominates_stmt_p (rstmt, lstmt)) - { - ret = 1; - stmts[1] = lstmt; - stmts[0] = rstmt; - } - else - return 0; - - basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) }; - if (bbs[0] == bbs[1]) - { - for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]); - gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi)) - if (gimple_vdef (gsi_stmt (gsi))) - return 0; - return ret; - } - - for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]); - !gsi_end_p (gsi); gsi_next (&gsi)) - if (gimple_vdef (gsi_stmt (gsi))) - return 0; +/* Find ways of folding logical expressions of LHS and RHS: - for (gphi_iterator gpi = gsi_start_phis (bbs[1]); - !gsi_end_p (gpi); gsi_next (&gpi)) - if (gimple_vdef (gsi_stmt (gpi))) - return 0; + Try to merge two comparisons to nearby fields. - for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]); - !gsi_end_p (gsi); gsi_prev (&gsi)) - if (gimple_vdef (gsi_stmt (gsi))) - return 0; - - for (basic_block bb = bbs[1]; bb != bbs[0]; ) - { - /* ??? For now, stop if any visited block has more than one - predecessor. */ - if (!single_pred_p (bb)) - return 0; + For example, if we have p->a == 2 && p->b == 4 and we can load both A and B + at once, we can do this with a comparison against the object ANDed with the + a mask. - bb = single_pred (bb); + If we have p->a == q->a && p->b == q->b, we may be able to use bit masking + operations to do this with one comparison, loading both fields from P at + once, and likewise from Q. - for (gphi_iterator gpi = gsi_start_phis (bbs[1]); - !gsi_end_p (gpi); gsi_next (&gpi)) - if (gimple_vdef (gsi_stmt (gpi))) - return 0; + Herein, loading at once means loading from within the same alignment + boundary for the enclosing object. If (packed) fields cross such alignment + boundaries, we may still recombine the compares, so that loads do not cross + the boundaries. - for (gimple_stmt_iterator gsi = gsi_start_bb (bb); - !gsi_end_p (gsi); gsi_next (&gsi)) - if (gimple_vdef (gsi_stmt (gsi))) - return 0; - } + CODE is the logical operation being done. It can be TRUTH_ANDIF_EXPR, + TRUTH_AND_EXPR, TRUTH_ORIF_EXPR, or TRUTH_OR_EXPR. - return ret; -} + TRUTH_TYPE is the type of the logical operand. -/* Find ways of folding logical expressions of LHS and RHS: - Try to merge two comparisons to the same innermost item. - Look for range tests like "ch >= '0' && ch <= '9'". - Look for combinations of simple terms on machines with expensive branches - and evaluate the RHS unconditionally. + LHS is denoted as LL_ARG LCODE LR_ARG. - For example, if we have p->a == 2 && p->b == 4 and we can make an - object large enough to span both A and B, we can do this with a comparison - against the object ANDed with the a mask. + RHS is denoted as RL_ARG RCODE RR_ARG. - If we have p->a == q->a && p->b == q->b, we may be able to use bit masking - operations to do this with one comparison. + LHS is assumed to dominate RHS. - We check for both normal comparisons and the BIT_AND_EXPRs made this by - function and the one above. + Combined loads are inserted next to preexisting loads, once we determine + that the combination is viable, and the combined condition references new + SSA_NAMEs that hold the loaded values. Since the original loads are + verified to have the same gimple_vuse, the insertion point doesn't matter + for correctness. ??? The loads may be a lot earlier than the compares, and + it's conceivable that one or two loads for RHS appear before those for LHS. + It could be advantageous to try to place the loads optimally, taking + advantage of knowing whether RHS is accessed before LHS, or that both are + accessed before both compares, but we don't do that (yet?). - CODE is the logical operation being done. It can be TRUTH_ANDIF_EXPR, - TRUTH_AND_EXPR, TRUTH_ORIF_EXPR, or TRUTH_OR_EXPR. + SEPARATEP should be NULL if the combined condition must be returned as a + single expression, even if it is a compound condition. This must only be + done if LHS and RHS are adjacent, without intervening conditions, and the + combined condition is to replace RHS, while LHS is dropped altogether. - TRUTH_TYPE is the type of the logical operand and LHS and RHS are its - two operands. + Otherwise, SEPARATEP must be a non-NULL pointer to a NULL_TREE, that may be + replaced by a part of the compound condition that could replace RHS, while + the returned expression replaces LHS. - SEPARATEP should be NULL if LHS and RHS are adjacent in - CODE-chained compares, and a non-NULL pointer to NULL_TREE - otherwise. If the "words" accessed by RHS are already accessed by - LHS, this won't matter, but if RHS accesses "words" that LHS - doesn't, then *SEPARATEP will be set to the compares that should - take RHS's place. By "words" we mean contiguous bits that do not - cross a an TYPE_ALIGN boundary of the accessed object's type. + If the "words" accessed by RHS are already accessed by LHS, this won't + matter, but if RHS accesses "words" that LHS doesn't, then *SEPARATEP will + be set to the compares that should take RHS's place. By "words" we mean + contiguous bits that do not cross a an TYPE_ALIGN boundary of the accessed + object's type. We return the simplified tree or 0 if no optimization is possible. */ @@ -7908,7 +7856,6 @@ fold_truth_andor_maybe_separate (location_t loc, enum tree_code wanted_code; tree ll_inner, lr_inner, rl_inner, rr_inner; gimple *ll_load, *lr_load, *rl_load, *rr_load; - int l_mergeable = 0, r_mergeable = 0; HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos; HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos; HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos; @@ -8015,7 +7962,7 @@ fold_truth_andor_maybe_separate (location_t loc, || ll_inner == 0 || rl_inner == 0 || ! operand_equal_p (ll_inner, rl_inner, 0) || (ll_load && rl_load - && ! (l_mergeable = mergeable_loads_p (ll_load, rl_load)))) + && gimple_vuse (ll_load) != gimple_vuse (rl_load))) return 0; if (TREE_CODE (lr_arg) == INTEGER_CST @@ -8028,7 +7975,7 @@ fold_truth_andor_maybe_separate (location_t loc, || lr_inner == 0 || rr_inner == 0 || ! operand_equal_p (lr_inner, rr_inner, 0) || (lr_load && rr_load - && ! (r_mergeable = mergeable_loads_p (lr_load, rr_load)))) + && gimple_vuse (lr_load) != gimple_vuse (rr_load))) return 0; if (lsignbit) @@ -8340,12 +8287,7 @@ fold_truth_andor_maybe_separate (location_t loc, ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg, rntype, rnbitsize, rnbitpos, lr_unsignedp || rr_unsignedp, - lr_reversep, - r_mergeable == 0 - ? NULL - : r_mergeable < 0 - ? lr_load - : rr_load); + lr_reversep, lr_load); } if (parts > 1) @@ -8353,18 +8295,8 @@ fold_truth_andor_maybe_separate (location_t loc, if (r_split_load) { gimple *point[2]; - if (r_mergeable == 0) - point[0] = point[1] = NULL; - else if (r_mergeable < 0) - { - point[0] = lr_load; - point[1] = rr_load; - } - else - { - point[0] = rr_load; - point[1] = lr_load; - } + point[0] = lr_load; + point[1] = rr_load; build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1], shifted[1], loc, lr_inner, lr_arg, rnmode, rnmode2, rnbitpos, lr_reversep, point); @@ -8430,12 +8362,7 @@ fold_truth_andor_maybe_separate (location_t loc, ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg, lntype, lnbitsize, lnbitpos, ll_unsignedp || rl_unsignedp, - ll_reversep, - l_mergeable == 0 - ? NULL - : l_mergeable < 0 - ? ll_load - : rl_load); + ll_reversep, ll_load); } if (parts > 1) @@ -8443,18 +8370,8 @@ fold_truth_andor_maybe_separate (location_t loc, if (l_split_load) { gimple *point[2]; - if (l_mergeable == 0) - point[0] = point[1] = NULL; - else if (l_mergeable < 0) - { - point[0] = ll_load; - point[1] = rl_load; - } - else - { - point[0] = rl_load; - point[1] = ll_load; - } + point[0] = ll_load; + point[1] = rl_load; build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0], shifted[0], loc, ll_inner, ll_arg, lnmode, lnmode2, lnbitpos, ll_reversep, point); diff --git a/gcc/testsuite/gcc.dg/field-merge-10.c b/gcc/testsuite/gcc.dg/field-merge-10.c new file mode 100644 index 000000000000..dca6febb7585 --- /dev/null +++ b/gcc/testsuite/gcc.dg/field-merge-10.c @@ -0,0 +1,36 @@ +/* { dg-do run } */ +/* { dg-options "-O" } */ + +/* Check that we don't move loads across stores. */ + +struct s { + short a; + short b; +} __attribute__ ((aligned (4))); + +struct s p[2] = { + { 42, 0xfe01 }, + { 42, 0xfe10 } +}; + +void f (void) { + short a0 = p[0].a; + short b0 = p[0].b; + short a1 = p[1].a; + short b1 = p[1].b; + __builtin_memset (p, 0, sizeof (p)); + asm ("" : "+m" (p)); + if (0 + || p[0].a != p[1].a + || p[0].b != p[1].b + ) + __builtin_abort (); + if (a0 == a1) + if (b0 == b1) + __builtin_abort (); +} + +int main () { + f (); + return 0; +} diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index d204209923a8..dff6e02faa31 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -952,7 +952,8 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, inner_cond_code, gimple_cond_lhs (inner_cond), gimple_cond_rhs (inner_cond), - &ts)))) + single_pred (inner_cond_bb) == outer_cond_bb + ? &ts : 0)))) { tree t1, t2; bool logical_op_non_short_circuit = LOGICAL_OP_NON_SHORT_CIRCUIT;