https://gcc.gnu.org/g:2beb301d622407e38dddf37b5d986f34de0cc715

commit 2beb301d622407e38dddf37b5d986f34de0cc715
Author: Alexandre Oliva <ol...@gnu.org>
Date:   Thu Nov 21 22:36:55 2024 -0300

    drop expensive mergeable tests in favor of gimple_vuse compares

Diff:
---
 gcc/gimple-fold.cc                    | 182 ++++++++++------------------------
 gcc/testsuite/gcc.dg/field-merge-10.c |  36 +++++++
 2 files changed, 87 insertions(+), 131 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 7d7471e5ed76..fe7fcf471d43 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -7763,7 +7763,7 @@ build_split_load (tree /* out */ ln_arg[2],
       tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
       bitpos[i] = bit_pos;
       ln_arg[i] = make_bit_field_load (loc, inner, orig_inner,
-                                     type, bitsiz[i],
+                                      type, bitsiz[i],
                                       bit_pos, 1, reversep, point[i]);
       bit_pos += bitsiz[i];
     }
@@ -7825,110 +7825,61 @@ reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
     }
 }
 
-/* Return nonzero if LSTMT and RSTMT, assumed to load from the same
-   words, can be safely merged into a single load.  Return zero if
-   there are intervening stores, or if neither stmt dominates the
-   other.  If they are mergeable, return -1 if the merged load should
-   be inserted before LSTMT to dominate both, otherwise return +1.  */
-
-static int
-mergeable_loads_p (gimple *lstmt, gimple *rstmt)
-{
-  gimple *stmts[2] = { lstmt, rstmt };
-  int ret;
-
-  if (stmt_dominates_stmt_p (lstmt, rstmt))
-    {
-      ret = -1;
-      stmts[0] = lstmt;
-      stmts[1] = rstmt;
-    }
-  else if (stmt_dominates_stmt_p (rstmt, lstmt))
-    {
-      ret = 1;
-      stmts[1] = lstmt;
-      stmts[0] = rstmt;
-    }
-  else
-    return 0;
-
-  basic_block bbs[2] = { gimple_bb (stmts[0]), gimple_bb (stmts[1]) };
-  if (bbs[0] == bbs[1])
-    {
-      for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
-          gsi_stmt (gsi) != stmts[1]; gsi_next (&gsi))
-       if (gimple_vdef (gsi_stmt (gsi)))
-         return 0;
-      return ret;
-    }
-
-  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[0]);
-       !gsi_end_p (gsi); gsi_next (&gsi))
-    if (gimple_vdef (gsi_stmt (gsi)))
-      return 0;
+/* Find ways of folding logical expressions of LHS and RHS:
 
-  for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
-       !gsi_end_p (gpi); gsi_next (&gpi))
-    if (gimple_vdef (gsi_stmt (gpi)))
-      return 0;
+   Try to merge two comparisons to nearby fields.
 
-  for (gimple_stmt_iterator gsi = gsi_for_stmt (stmts[1]);
-       !gsi_end_p (gsi); gsi_prev (&gsi))
-    if (gimple_vdef (gsi_stmt (gsi)))
-      return 0;
-
-  for (basic_block bb = bbs[1]; bb != bbs[0]; )
-    {
-      /* ??? For now, stop if any visited block has more than one
-        predecessor.  */
-      if (!single_pred_p (bb))
-       return 0;
+   For example, if we have p->a == 2 && p->b == 4 and we can load both A and B
+   at once, we can do this with a comparison against the object ANDed with the
+   a mask.
 
-      bb = single_pred (bb);
+   If we have p->a == q->a && p->b == q->b, we may be able to use bit masking
+   operations to do this with one comparison, loading both fields from P at
+   once, and likewise from Q.
 
-      for (gphi_iterator gpi = gsi_start_phis (bbs[1]);
-          !gsi_end_p (gpi); gsi_next (&gpi))
-       if (gimple_vdef (gsi_stmt (gpi)))
-         return 0;
+   Herein, loading at once means loading from within the same alignment
+   boundary for the enclosing object.  If (packed) fields cross such alignment
+   boundaries, we may still recombine the compares, so that loads do not cross
+   the boundaries.
 
-      for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
-          !gsi_end_p (gsi); gsi_next (&gsi))
-       if (gimple_vdef (gsi_stmt (gsi)))
-         return 0;
-    }
+   CODE is the logical operation being done.  It can be TRUTH_ANDIF_EXPR,
+   TRUTH_AND_EXPR, TRUTH_ORIF_EXPR, or TRUTH_OR_EXPR.
 
-  return ret;
-}
+   TRUTH_TYPE is the type of the logical operand.
 
-/* Find ways of folding logical expressions of LHS and RHS:
-   Try to merge two comparisons to the same innermost item.
-   Look for range tests like "ch >= '0' && ch <= '9'".
-   Look for combinations of simple terms on machines with expensive branches
-   and evaluate the RHS unconditionally.
+   LHS is denoted as LL_ARG LCODE LR_ARG.
 
-   For example, if we have p->a == 2 && p->b == 4 and we can make an
-   object large enough to span both A and B, we can do this with a comparison
-   against the object ANDed with the a mask.
+   RHS is denoted as RL_ARG RCODE RR_ARG.
 
-   If we have p->a == q->a && p->b == q->b, we may be able to use bit masking
-   operations to do this with one comparison.
+   LHS is assumed to dominate RHS.
 
-   We check for both normal comparisons and the BIT_AND_EXPRs made this by
-   function and the one above.
+   Combined loads are inserted next to preexisting loads, once we determine
+   that the combination is viable, and the combined condition references new
+   SSA_NAMEs that hold the loaded values.  Since the original loads are
+   verified to have the same gimple_vuse, the insertion point doesn't matter
+   for correctness.  ??? The loads may be a lot earlier than the compares, and
+   it's conceivable that one or two loads for RHS appear before those for LHS.
+   It could be advantageous to try to place the loads optimally, taking
+   advantage of knowing whether RHS is accessed before LHS, or that both are
+   accessed before both compares, but we don't do that (yet?).
 
-   CODE is the logical operation being done.  It can be TRUTH_ANDIF_EXPR,
-   TRUTH_AND_EXPR, TRUTH_ORIF_EXPR, or TRUTH_OR_EXPR.
+   SEPARATEP should be NULL if the combined condition must be returned as a
+   single expression, even if it is a compound condition.  This must only be
+   done if LHS and RHS are adjacent, without intervening conditions, and the
+   combined condition is to replace RHS, while LHS is dropped altogether.
+   ??? This possibility is curently unused.
 
-   TRUTH_TYPE is the type of the logical operand and LHS and RHS are its
-   two operands.
+   Otherwise, SEPARATEP must be a non-NULL pointer to a NULL_TREE, that may be
+   replaced by a part of the compound condition that could replace RHS, while
+   the returned expression replaces LHS.  This works whether or not LHS and RHS
+   are adjacent, as long as there aren't VDEFs or other side effects between
+   them.
 
-   SEPARATEP should be NULL if LHS and RHS are adjacent in
-   CODE-chained compares, and a non-NULL pointer to NULL_TREE
-   otherwise.  If the "words" accessed by RHS are already accessed by
-   LHS, this won't matter, but if RHS accesses "words" that LHS
-   doesn't, then *SEPARATEP will be set to the compares that should
-   take RHS's place.  By "words" we mean contiguous bits that do not
-   cross a an TYPE_ALIGN boundary of the accessed object's type.
+   If the "words" accessed by RHS are already accessed by LHS, this won't
+   matter, but if RHS accesses "words" that LHS doesn't, then *SEPARATEP will
+   be set to the compares that should take RHS's place.  By "words" we mean
+   contiguous bits that do not cross a an TYPE_ALIGN boundary of the accessed
+   object's type.
 
    We return the simplified tree or 0 if no optimization is possible.  */
 
@@ -7952,7 +7903,6 @@ fold_truth_andor_maybe_separate (location_t loc,
   enum tree_code wanted_code;
   tree ll_inner, lr_inner, rl_inner, rr_inner;
   gimple *ll_load, *lr_load, *rl_load, *rr_load;
-  int l_mergeable = 0, r_mergeable = 0;
   HOST_WIDE_INT ll_bitsize, ll_bitpos, lr_bitsize, lr_bitpos;
   HOST_WIDE_INT rl_bitsize, rl_bitpos, rr_bitsize, rr_bitpos;
   HOST_WIDE_INT xll_bitpos, xlr_bitpos, xrl_bitpos, xrr_bitpos;
@@ -8058,7 +8008,7 @@ fold_truth_andor_maybe_separate (location_t loc,
       || ll_inner == 0 || rl_inner == 0
       || ! operand_equal_p (ll_inner, rl_inner, 0)
       || (ll_load && rl_load
-         && ! (l_mergeable = mergeable_loads_p (ll_load, rl_load))))
+         && gimple_vuse (ll_load) != gimple_vuse (rl_load)))
     return 0;
 
   if (TREE_CODE (lr_arg) == INTEGER_CST
@@ -8071,7 +8021,7 @@ fold_truth_andor_maybe_separate (location_t loc,
           || lr_inner == 0 || rr_inner == 0
           || ! operand_equal_p (lr_inner, rr_inner, 0)
           || (lr_load && rr_load
-              && ! (r_mergeable = mergeable_loads_p (lr_load, rr_load))))
+              && gimple_vuse (lr_load) != gimple_vuse (rr_load)))
     return 0;
   else
     l_const = r_const = 0;
@@ -8383,12 +8333,7 @@ fold_truth_andor_maybe_separate (location_t loc,
          ld_arg[1][0] = make_bit_field_load (loc, lr_inner, lr_arg,
                                              rntype, rnbitsize, rnbitpos,
                                              lr_unsignedp || rr_unsignedp,
-                                             lr_reversep,
-                                             r_mergeable == 0
-                                             ? NULL
-                                             : r_mergeable < 0
-                                             ? lr_load
-                                             : rr_load);
+                                             lr_reversep, lr_load);
        }
 
       if (parts > 1)
@@ -8396,18 +8341,8 @@ fold_truth_andor_maybe_separate (location_t loc,
          if (r_split_load)
            {
              gimple *point[2];
-             if (r_mergeable == 0)
-               point[0] = point[1] = NULL;
-             else if (r_mergeable < 0)
-               {
-                 point[0] = lr_load;
-                 point[1] = rr_load;
-               }
-             else
-               {
-                 point[0] = rr_load;
-                 point[1] = lr_load;
-               }
+             point[0] = lr_load;
+             point[1] = rr_load;
              build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
                                shifted[1], loc, lr_inner, lr_arg,
                                rnmode, rnmode2, rnbitpos, lr_reversep, point);
@@ -8476,12 +8411,7 @@ fold_truth_andor_maybe_separate (location_t loc,
       ld_arg[0][0] = make_bit_field_load (loc, ll_inner, ll_arg,
                                          lntype, lnbitsize, lnbitpos,
                                          ll_unsignedp || rl_unsignedp,
-                                         ll_reversep,
-                                         l_mergeable == 0
-                                         ? NULL
-                                         : l_mergeable < 0
-                                         ? ll_load
-                                         : rl_load);
+                                         ll_reversep, ll_load);
     }
 
   if (parts > 1)
@@ -8489,18 +8419,8 @@ fold_truth_andor_maybe_separate (location_t loc,
       if (l_split_load)
            {
              gimple *point[2];
-             if (l_mergeable == 0)
-               point[0] = point[1] = NULL;
-             else if (l_mergeable < 0)
-               {
-                 point[0] = ll_load;
-                 point[1] = rl_load;
-               }
-             else
-               {
-                 point[0] = rl_load;
-                 point[1] = ll_load;
-               }
+             point[0] = ll_load;
+             point[1] = rl_load;
              build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
                                shifted[0], loc, ll_inner, ll_arg,
                                lnmode, lnmode2, lnbitpos, ll_reversep, point);
diff --git a/gcc/testsuite/gcc.dg/field-merge-10.c 
b/gcc/testsuite/gcc.dg/field-merge-10.c
new file mode 100644
index 000000000000..dca6febb7585
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-10.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "-O" } */
+
+/* Check that we don't move loads across stores.  */
+
+struct s {
+  short a;
+  short b;
+} __attribute__ ((aligned (4)));
+
+struct s p[2] = {
+  { 42, 0xfe01 },
+  { 42, 0xfe10 }
+};
+
+void f (void) {
+  short a0 = p[0].a;
+  short b0 = p[0].b;
+  short a1 = p[1].a;
+  short b1 = p[1].b;
+  __builtin_memset (p, 0, sizeof (p));
+  asm ("" : "+m" (p));
+  if (0
+      || p[0].a != p[1].a
+      || p[0].b != p[1].b
+      )
+    __builtin_abort ();
+  if (a0 == a1)
+    if (b0 == b1)
+      __builtin_abort ();
+}
+
+int main () {
+  f ();
+  return 0;
+}

Reply via email to