On Dec 11, 2024, Richard Biener <richard.guent...@gmail.com> wrote:

> I think These 0, 0 args are supposed to indicate Maximum extent of the
> resulting Access

Thanks, that looks much better indeed.


A bootstrap on aarch64-linux-gnu revealed that sometimes (for example,
when building shorten_branches in final.cc) we will find such things
as MEM <unsigned int>, where unsigned int happens to be a variant of
the original unsigned int type, but with 64-bit alignment.  This
unusual alignment circumstance caused (i) get_inner_reference to not
look inside the MEM, (ii) get_best_mode to choose DImode instead of
SImode to access the object, so we built a BIT_FIELD_REF that
attempted to select all 64 bits of a 32-bit object, and that failed
gimple verification ("position plus size exceeds size of referenced
object") because there aren't that many bits in the unsigned int
object.

This patch avoids this failure mode by limiting the bitfield range
with the size of the inner object, if it is a known constant.

This enables us to avoid creating a BIT_FIELD_REF and reusing the load
expr, but we still introduced a separate load, that would presumably
get optimized out, but that is easy enough to avoid in the first place
by reusing the SSA_NAME it was originally loaded into, so I
implemented that in make_bit_field_load.

Regstrapped on x86_64-linux-gnu; tested that it fixes the known issue on
aarch64-linux-gnu, regstrapping now.  Ok to install?


for  gcc/ChangeLog

        * gimple-fold.cc (fold_truth_andor_for_ifcombine): Limit the
        size of the bitregion in get_best_mode calls by the inner
        object's type size, if known.
        (make_bit_field_load): Reuse SSA_NAME if we're attempting to
        issue an identical load.
---
 gcc/gimple-fold.cc |   52 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 34 insertions(+), 18 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index a31fc283d51b0..9179010c9eaf1 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -7751,6 +7751,15 @@ make_bit_field_load (location_t loc, tree inner, tree 
orig_inner, tree type,
   if (!point)
     return ref;
 
+  /* If we're remaking the same load, reuse the SSA NAME it is already loaded
+     into.  */
+  if (gimple_assign_load_p (point)
+      && operand_equal_p (ref, gimple_assign_rhs1 (point)))
+    {
+      gcc_checking_assert (TREE_CODE (gimple_assign_lhs (point)) == SSA_NAME);
+      return gimple_assign_lhs (point);
+    }
+
   gimple_seq stmts = NULL;
   tree ret = force_gimple_operand (ref, &stmts, true, NULL_TREE);
 
@@ -8204,24 +8213,27 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
tree truth_type,
      to be relative to a field of that size.  */
   first_bit = MIN (ll_bitpos, rl_bitpos);
   end_bit = MAX (ll_bitpos + ll_bitsize, rl_bitpos + rl_bitsize);
-  if (get_best_mode (end_bit - first_bit, first_bit, 0, 0,
-                    TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD,
-                    volatilep, &lnmode))
+  HOST_WIDE_INT ll_align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+  poly_uint64 ll_end_region = 0;
+  if (TYPE_SIZE (TREE_TYPE (ll_inner))
+      && uniform_integer_cst_p (TYPE_SIZE (TREE_TYPE (ll_inner))))
+    ll_end_region = tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (ll_inner)));
+  if (get_best_mode (end_bit - first_bit, first_bit, 0, ll_end_region,
+                    ll_align, BITS_PER_WORD, volatilep, &lnmode))
     l_split_load = false;
   else
     {
       /* Consider the possibility of recombining loads if any of the
         fields straddles across an alignment boundary, so that either
         part can be loaded along with the other field.  */
-      HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
       HOST_WIDE_INT boundary = compute_split_boundary_from_align
-       (align, ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize);
+       (ll_align, ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize);
 
       if (boundary < 0
-         || !get_best_mode (boundary - first_bit, first_bit, 0, 0,
-                            align, BITS_PER_WORD, volatilep, &lnmode)
-         || !get_best_mode (end_bit - boundary, boundary, 0, 0,
-                            align, BITS_PER_WORD, volatilep, &lnmode2))
+         || !get_best_mode (boundary - first_bit, first_bit, 0, ll_end_region,
+                            ll_align, BITS_PER_WORD, volatilep, &lnmode)
+         || !get_best_mode (end_bit - boundary, boundary, 0, ll_end_region,
+                            ll_align, BITS_PER_WORD, volatilep, &lnmode2))
        return 0;
 
       /* If we can't have a single load, but can with two, figure out whether
@@ -8368,16 +8380,19 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
tree truth_type,
         and then we use two separate compares.  */
       first_bit = MIN (lr_bitpos, rr_bitpos);
       end_bit = MAX (lr_bitpos + lr_bitsize, rr_bitpos + rr_bitsize);
-      if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
-                         TYPE_ALIGN (TREE_TYPE (lr_inner)), BITS_PER_WORD,
-                         volatilep, &rnmode))
+      HOST_WIDE_INT lr_align = TYPE_ALIGN (TREE_TYPE (lr_inner));
+      poly_uint64 lr_end_region = 0;
+      if (TYPE_SIZE (TREE_TYPE (lr_inner))
+         && uniform_integer_cst_p (TYPE_SIZE (TREE_TYPE (lr_inner))))
+       lr_end_region = tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (lr_inner)));
+      if (!get_best_mode (end_bit - first_bit, first_bit, 0, lr_end_region,
+                         lr_align, BITS_PER_WORD, volatilep, &rnmode))
        {
          /* Consider the possibility of recombining loads if any of the
             fields straddles across an alignment boundary, so that either
             part can be loaded along with the other field.  */
-         HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (lr_inner));
          HOST_WIDE_INT boundary = compute_split_boundary_from_align
-           (align, lr_bitpos, lr_bitsize, rr_bitpos, rr_bitsize);
+           (lr_align, lr_bitpos, lr_bitsize, rr_bitpos, rr_bitsize);
 
          if (boundary < 0
              /* If we're to split both, make sure the split point is
@@ -8385,10 +8400,11 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
tree truth_type,
              || (l_split_load
                  && (boundary - lr_bitpos
                      != (lnbitpos + GET_MODE_BITSIZE (lnmode)) - ll_bitpos))
-             || !get_best_mode (boundary - first_bit, first_bit, 0, 0,
-                                align, BITS_PER_WORD, volatilep, &rnmode)
-             || !get_best_mode (end_bit - boundary, boundary, 0, 0,
-                                align, BITS_PER_WORD, volatilep, &rnmode2))
+             || !get_best_mode (boundary - first_bit, first_bit,
+                                0, lr_end_region,
+                                lr_align, BITS_PER_WORD, volatilep, &rnmode)
+             || !get_best_mode (end_bit - boundary, boundary, 0, lr_end_region,
+                                lr_align, BITS_PER_WORD, volatilep, &rnmode2))
            return 0;
 
          r_split_load = true;


-- 
Alexandre Oliva, happy hacker            https://FSFLA.org/blogs/lxo/
   Free Software Activist                   GNU Toolchain Engineer
More tolerance and less prejudice are key for inclusion and diversity
Excluding neuro-others for not behaving ""normal"" is *not* inclusive

Reply via email to