https://gcc.gnu.org/g:2beb301d622407e38dddf37b5d986f34de0cc715
commit 2beb301d622407e38dddf37b5d986f34de0cc715 Author: Alexandre Oliva <ol...@gnu.org> Date: Thu Nov 21 22:36:55 2024 -0300 drop expensive mergeable tests in favor of gimple_vuse compares Diff: --- gcc/gimple-fold.cc | 182 ++++++++++------------------------ gcc/testsuite/gcc.dg/field-merge-10.c | 36 +++++++ 2 files changed, 87 insertions(+), 131 deletions(-) diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index 7d7471e5ed76..fe7fcf471d43 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -7763,7 +7763,7 @@ build_split_load (tree /* out */ ln_arg[2], tree type = lang_hooks.types.type_for_size (bitsiz[i], 1); bitpos[i] = bit_pos; ln_arg[i] = make_bit_field_load (loc, inner, orig_inner, - type, bitsiz[i], + type, bitsiz[i], bit_pos, 1, reversep, point[i]); bit_pos += bitsiz[i]; } @@ -7825,110 +7825,61 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2], } } -/* Return nonzero if LSTMT and RSTMT, assumed to load from the same - words, can be safely merged into a single load. Return zero if - there are intervening stores, or if neither stmt dominates the - other. If they are mergeable, return -1 if the merged load should - be inserted before LSTMT to dominate both, otherwise return +1. */ - -static int -mergeable_loads_p (gimple *lstmt, gimple *rstmt) -{ - gimple *stmts[2] = { lstmt, rstmt }; - int ret; - - if (stmt_dominates_stmt_p (lstmt, rstmt)) - { - ret = -1; - stmts[0] = lstmt; - stmts[1] = rstmt; - } - else if (stmt_dominates_stmt_p (rstmt, lstmt)) - { - ret = 1; - stmts[1] = lstmt; - stmts[0] = rstmt; - } - else - return 0; - - basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) }; - if (bbs[0] == bbs[1]) - { - for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]); - gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi)) - if (gimple_vdef (gsi_stmt (gsi))) - return 0; - return ret; - } - - for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]); - !gsi_end_p (gsi); gsi_next (&gsi)) - if (gimple_vdef (gsi_stmt (gsi))) - return 0; +/* Find ways of folding logical expressions of LHS and RHS: - for (gphi_iterator gpi = gsi_start_phis (bbs[1]); - !gsi_end_p (gpi); gsi_next (&gpi)) - if (gimple_vdef (gsi_stmt (gpi))) - return 0; + Try to merge two comparisons to nearby fields. - for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]); - !gsi_end_p (gsi); gsi_prev (&gsi)) - if (gimple_vdef (gsi_stmt (gsi))) - return 0; - - for (basic_block bb = bbs[1]; bb != bbs[0]; ) - { - /* ??? For now, stop if any visited block has more than one - predecessor. */ - if (!single_pred_p (bb)) - return 0; + For example, if we have p->a == 2 && p->b == 4 and we can load both A and B + at once, we can do this with a comparison against the object ANDed with the + a mask. - bb = single_pred (bb); + If we have p->a == q->a && p->b == q->b, we may be able to use bit masking + operations to do this with one comparison, loading both fields from P at + once, and likewise from Q. - for (gphi_iterator gpi = gsi_start_phis (bbs[1]); - !gsi_end_p (gpi); gsi_next (&gpi)) - if (gimple_vdef (gsi_stmt (gpi))) - return 0; + Herein, loading at once means loading from within the same alignment + boundary for the enclosing object. If (packed) fields cross such alignment + boundaries, we may still recombine the compares, so that loads do not cross + the boundaries. - for (gimple_stmt_iterator gsi = gsi_start_bb (bb); - !gsi_end_p (gsi); gsi_next (&gsi)) - if (gimple_vdef (gsi_stmt (gsi))) - return 0; - } + CODE is the logical operation being done. It can be TRUTH_ANDIF_EXPR, + TRUTH_AND_EXPR, TRUTH_ORIF_EXPR, or TRUTH_OR_EXPR. - return ret; -} + TRUTH_TYPE is the type of the logical operand. -/* Find ways of folding logical expressions of LHS and RHS: - Try to merge two comparisons to the same innermost item. - Look for range tests like "ch >= '0' && ch <= '9'". - Look for combinations of simple terms on machines with expensive branches - and evaluate the RHS unconditionally. + LHS is denoted as LL_ARG LCODE LR_ARG. - For example, if we have p->a == 2 && p->b == 4 and we can make an - object large enough to span both A and B, we can do this with a comparison - against the object ANDed with the a mask. + RHS is denoted as RL_ARG RCODE RR_ARG. - If we have p->a == q->a && p->b == q->b, we may be able to use bit masking - operations to do this with one comparison. + LHS is assumed to dominate RHS. - We check for both normal comparisons and the BIT_AND_EXPRs made this by - function and the one above. + Combined loads are inserted next to preexisting loads, once we determine + that the combination is viable, and the combined condition references new + SSA_NAMEs that hold the loaded values. Since the original loads are + verified to have the same gimple_vuse, the insertion point doesn't matter + for correctness. ??? The loads may be a lot earlier than the compares, and + it's conceivable that one or two loads for RHS appear before those for LHS. + It could be advantageous to try to place the loads optimally, taking + advantage of knowing whether RHS is accessed before LHS, or that both are + accessed before both compares, but we don't do that (yet?). - CODE is the logical operation being done. It can be TRUTH_ANDIF_EXPR, - TRUTH_AND_EXPR, TRUTH_ORIF_EXPR, or TRUTH_OR_EXPR. + SEPARATEP should be NULL if the combined condition must be returned as a + single expression, even if it is a compound condition. This must only be + done if LHS and RHS are adjacent, without intervening conditions, and the + combined condition is to replace RHS, while LHS is dropped altogether. + ??? This possibility is curently unused. - TRUTH_TYPE is the type of the logical operand and LHS and RHS are its - two operands. + Otherwise, SEPARATEP must be a non-NULL pointer to a NULL_TREE, that may be + replaced by a part of the compound condition that could replace RHS, while + the returned expression replaces LHS. This works whether or not LHS and RHS + are adjacent, as long as there aren't VDEFs or other side effects between + them. - SEPARATEP should be NULL if LHS and RHS are adjacent in - CODE-chained compares, and a non-NULL pointer to NULL_TREE - otherwise. If the "words" accessed by RHS are already accessed by - LHS, this won't matter, but if RHS accesses "words" that LHS - doesn't, then *SEPARATEP will be set to the compares that should - take RHS's place. By "words" we mean contiguous bits that do not - cross a an TYPE_ALIGN boundary of the accessed object's type. + If the "words" accessed by RHS are already accessed by LHS, this won't + matter, but if RHS accesses "words" that LHS doesn't, then *SEPARATEP will + be set to the compares that should take RHS's place. By "words" we mean + contiguous bits that do not cross a an TYPE_ALIGN boundary of the accessed + object's type. We return the simplified tree or 0 if no optimization is possible. */ @@ -7952,7 +7903,6 @@ fold_truth_andor_maybe_separate (location_t loc, enum tree_code wanted_code; tree ll_inner, lr_inner, rl_inner, rr_inner; gimple *ll_load, *lr_load, *rl_load, *rr_load; - int l_mergeable = 0, r_mergeable = 0; HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos; HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos; HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos; @@ -8058,7 +8008,7 @@ fold_truth_andor_maybe_separate (location_t loc, || ll_inner == 0 || rl_inner == 0 || ! operand_equal_p (ll_inner, rl_inner, 0) || (ll_load && rl_load - && ! (l_mergeable = mergeable_loads_p (ll_load, rl_load)))) + && gimple_vuse (ll_load) != gimple_vuse (rl_load))) return 0; if (TREE_CODE (lr_arg) == INTEGER_CST @@ -8071,7 +8021,7 @@ fold_truth_andor_maybe_separate (location_t loc, || lr_inner == 0 || rr_inner == 0 || ! operand_equal_p (lr_inner, rr_inner, 0) || (lr_load && rr_load - && ! (r_mergeable = mergeable_loads_p (lr_load, rr_load)))) + && gimple_vuse (lr_load) != gimple_vuse (rr_load))) return 0; else l_const = r_const = 0; @@ -8383,12 +8333,7 @@ fold_truth_andor_maybe_separate (location_t loc, ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg, rntype, rnbitsize, rnbitpos, lr_unsignedp || rr_unsignedp, - lr_reversep, - r_mergeable == 0 - ? NULL - : r_mergeable < 0 - ? lr_load - : rr_load); + lr_reversep, lr_load); } if (parts > 1) @@ -8396,18 +8341,8 @@ fold_truth_andor_maybe_separate (location_t loc, if (r_split_load) { gimple *point[2]; - if (r_mergeable == 0) - point[0] = point[1] = NULL; - else if (r_mergeable < 0) - { - point[0] = lr_load; - point[1] = rr_load; - } - else - { - point[0] = rr_load; - point[1] = lr_load; - } + point[0] = lr_load; + point[1] = rr_load; build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1], shifted[1], loc, lr_inner, lr_arg, rnmode, rnmode2, rnbitpos, lr_reversep, point); @@ -8476,12 +8411,7 @@ fold_truth_andor_maybe_separate (location_t loc, ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg, lntype, lnbitsize, lnbitpos, ll_unsignedp || rl_unsignedp, - ll_reversep, - l_mergeable == 0 - ? NULL - : l_mergeable < 0 - ? ll_load - : rl_load); + ll_reversep, ll_load); } if (parts > 1) @@ -8489,18 +8419,8 @@ fold_truth_andor_maybe_separate (location_t loc, if (l_split_load) { gimple *point[2]; - if (l_mergeable == 0) - point[0] = point[1] = NULL; - else if (l_mergeable < 0) - { - point[0] = ll_load; - point[1] = rl_load; - } - else - { - point[0] = rl_load; - point[1] = ll_load; - } + point[0] = ll_load; + point[1] = rl_load; build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0], shifted[0], loc, ll_inner, ll_arg, lnmode, lnmode2, lnbitpos, ll_reversep, point); diff --git a/gcc/testsuite/gcc.dg/field-merge-10.c b/gcc/testsuite/gcc.dg/field-merge-10.c new file mode 100644 index 000000000000..dca6febb7585 --- /dev/null +++ b/gcc/testsuite/gcc.dg/field-merge-10.c @@ -0,0 +1,36 @@ +/* { dg-do run } */ +/* { dg-options "-O" } */ + +/* Check that we don't move loads across stores. */ + +struct s { + short a; + short b; +} __attribute__ ((aligned (4))); + +struct s p[2] = { + { 42, 0xfe01 }, + { 42, 0xfe10 } +}; + +void f (void) { + short a0 = p[0].a; + short b0 = p[0].b; + short a1 = p[1].a; + short b1 = p[1].b; + __builtin_memset (p, 0, sizeof (p)); + asm ("" : "+m" (p)); + if (0 + || p[0].a != p[1].a + || p[0].b != p[1].b + ) + __builtin_abort (); + if (a0 == a1) + if (b0 == b1) + __builtin_abort (); +} + +int main () { + f (); + return 0; +}