Hi,

Currently the address legitimization (by calling memory_address_addr_space) is carried out twice during the RTL expansion of ARRAY_REF, COMPONENT_REF, etc. when their OFFSET is not NULL. It is done once for the BASE and once for the summed address in offset_address. This may cause part, if not all, of the generated BASE RTL to be forced into reg(s), preventing the RTL generator from carrying out effective re-association across BASE and OFFSET (via simplify_gen_binary).

For example, given the following test case:

typedef int arr_2[20][20];
void foo (arr_2 a2, int i, int j)
{
  a2[i+10][j] = 1;
}

the RTL code for the BASE (i.e. a2[i+10]) on arm (-mcpu=cortex-a15, -mthumb) is

* before the legitimization of BASE:

(plus:SI (plus:SI (mult:SI (reg/v:SI 115 [ i ])
            (const_int 80 [0x50]))
        (reg/v/f:SI 114 [ a2 ]))
    (const_int 800 [0x320]))
(reg/f:SI 122)

* after the legitimization of BASE:

(reg/f:SI 122)

"Thanks to" the initial legitimization, the RTL for the final address is turned into:

(plus:SI (mult:SI (reg/v:SI 116 [ j ])
        (const_int 4 [0x4]))
    (reg/f:SI 122))

while with the legitimization deferred, the RTL for the final address could be:

(plus:SI (plus:SI (plus:SI (mult:SI (reg/v:SI 115 [ i ])
                (const_int 80 [0x50]))
            (mult:SI (reg/v:SI 116 [ j ])
                (const_int 4 [0x4])))
        (reg/v/f:SI 114 [ a2 ]))
    (const_int 800 [0x320]))

which has more complete info in the RTL and is much more canonicalized; later on it could open up more opportunities for CSE.

The effect of this duplicated legitimization effort varies across different targets, as it is strongly related to the available addressing modes on a target. On RISC machines where in general there are fewer addressing modes (which are in general less complicated as well), the RTL code quality can be affected more adversely.

The patch passes bootstrapping on arm and x86_64 and regtest on arm-none-eabi, aarch64-none-elf and x86_64. There is no regression in spec2000 on arm or x86_64.

OK for the mainline?

Thanks,
Yufeng


gcc/

* cfgexpand.c (expand_call_stmt): Update the call to expand_expr_real_1. * expr.c (expand_assignment): Add new local variable validate_p and set
        it; call expand_expr or expand_expr_nv depending on validate_p for
        to_rtx; call adjust_address_1 instead of adjust_address.
        (store_expr): Update the call to expand_expr_real.
(expand_expr_real): Add new parameter 'validate_p'; update the call to
        expand_expr_real_1.
(expand_expr_real_1): Add new parameter 'validate_p'; update the call
        to expand_expr_real; depending on validate_p, call
        memory_address_addr_space or convert_memory_address_addr_space;
        likewise for expand_expr or expand_expr_nv; call adjust_address_1
        instead of adjust_address.
        * expr.h (expand_expr_real): Update.
        (expand_expr_real_1): Update.
        (expand_expr): Update.
        (expand_expr_nv): New function.
        (expand_normal): Update.
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index c312c37..e01c7ff 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -2189,7 +2189,7 @@ expand_call_stmt (gimple stmt)
   if (lhs)
     expand_assignment (lhs, exp, false);
   else
-    expand_expr_real_1 (exp, const0_rtx, VOIDmode, EXPAND_NORMAL, NULL);
+    expand_expr_real_1 (exp, const0_rtx, VOIDmode, EXPAND_NORMAL, NULL, true);
 
   mark_transaction_restart_calls (stmt);
 }
diff --git a/gcc/expr.c b/gcc/expr.c
index 89e3979..90a2405 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -4714,6 +4714,7 @@ expand_assignment (tree to, tree from, bool nontemporal)
       int unsignedp;
       int volatilep = 0;
       tree tem;
+      bool validate_p;
 
       push_temp_slots ();
       tem = get_inner_reference (to, &bitsize, &bitpos, &offset, &mode1,
@@ -4723,7 +4724,12 @@ expand_assignment (tree to, tree from, bool nontemporal)
          && DECL_BIT_FIELD_TYPE (TREE_OPERAND (to, 1)))
        get_bit_range (&bitregion_start, &bitregion_end, to, &bitpos, &offset);
 
-      to_rtx = expand_expr (tem, NULL_RTX, VOIDmode, EXPAND_WRITE);
+      /* If OFFSET is not NULL, we defer the address legitimization til
+        the moment that the base and offset have been both expanded and
+        summed up.  */
+      validate_p = offset == NULL;
+      to_rtx = ((validate_p ? expand_expr : expand_expr_nv)
+               (tem, NULL_RTX, VOIDmode, EXPAND_WRITE));
 
       /* If the bitfield is volatile, we want to access it in the
         field's mode, not the computed mode.
@@ -4734,9 +4740,9 @@ expand_assignment (tree to, tree from, bool nontemporal)
          if (volatilep && flag_strict_volatile_bitfields > 0)
            to_rtx = adjust_address (to_rtx, mode1, 0);
          else if (GET_MODE (to_rtx) == VOIDmode)
-           to_rtx = adjust_address (to_rtx, BLKmode, 0);
+           to_rtx = adjust_address_1 (to_rtx, BLKmode, 0, validate_p, 1, 0, 0);
        }
- 
+
       if (offset != 0)
        {
          enum machine_mode address_mode;
@@ -4766,7 +4772,8 @@ expand_assignment (tree to, tree from, bool nontemporal)
              && (bitsize % GET_MODE_ALIGNMENT (mode1)) == 0
              && MEM_ALIGN (to_rtx) == GET_MODE_ALIGNMENT (mode1))
            {
-             to_rtx = adjust_address (to_rtx, mode1, bitpos / BITS_PER_UNIT);
+             to_rtx = adjust_address_1 (to_rtx, mode1, bitpos / BITS_PER_UNIT,
+                                        validate_p, 1, 0, 0);
              bitpos = 0;
            }
 
@@ -5209,7 +5216,7 @@ store_expr (tree exp, rtx target, int call_param_p, bool 
nontemporal)
       temp = expand_expr_real (exp, tmp_target, GET_MODE (target),
                               (call_param_p
                                ? EXPAND_STACK_PARM : EXPAND_NORMAL),
-                              &alt_rtl);
+                              &alt_rtl, true);
     }
 
   /* If TEMP is a VOIDmode constant and the mode of the type of EXP is not
@@ -7826,11 +7833,18 @@ expand_constructor (tree exp, rtx target, enum 
expand_modifier modifier,
    address, and ALT_RTL is non-NULL, then *ALT_RTL is set to the
    DECL_RTL of the VAR_DECL.  *ALT_RTL is also set if EXP is a
    COMPOUND_EXPR whose second argument is such a VAR_DECL, and so on
-   recursively.  */
+   recursively.
+
+   If VALIDATE_P is false, the address legitimization will not be applied
+   to the generated addressing code for MEM_REF.  This is to help
+   defer the address legitimization for inner-referencing expressions, e.g.
+   COMPONENT_REF, ARRAY_REF, BIT_FIELD_REF, etc., til their base and offset
+   have been both expanded and summed up.  */
 
 rtx
 expand_expr_real (tree exp, rtx target, enum machine_mode tmode,
-                 enum expand_modifier modifier, rtx *alt_rtl)
+                 enum expand_modifier modifier, rtx *alt_rtl,
+                 bool validate_p)
 {
   rtx ret;
 
@@ -7842,7 +7856,7 @@ expand_expr_real (tree exp, rtx target, enum machine_mode 
tmode,
       return ret ? ret : const0_rtx;
     }
 
-  ret = expand_expr_real_1 (exp, target, tmode, modifier, alt_rtl);
+  ret = expand_expr_real_1 (exp, target, tmode, modifier, alt_rtl, validate_p);
   return ret;
 }
 
@@ -9153,7 +9167,8 @@ stmt_is_replaceable_p (gimple stmt)
 
 rtx
 expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
-                   enum expand_modifier modifier, rtx *alt_rtl)
+                   enum expand_modifier modifier, rtx *alt_rtl,
+                   bool validate_p)
 {
   rtx op0, op1, temp, decl_rtl;
   tree type;
@@ -9299,7 +9314,7 @@ expand_expr_real_1 (tree exp, rtx target, enum 
machine_mode tmode,
 
          set_curr_insn_location (gimple_location (g));
          r = expand_expr_real (gimple_assign_rhs_to_tree (g), target,
-                               tmode, modifier, NULL);
+                               tmode, modifier, NULL, true);
          set_curr_insn_location (saved_loc);
          if (REG_P (r) && !REG_EXPR (r))
            set_reg_attrs_for_decl_rtl (SSA_NAME_VAR (exp), r);
@@ -9520,7 +9535,8 @@ expand_expr_real_1 (tree exp, rtx target, enum 
machine_mode tmode,
     case SAVE_EXPR:
       {
        tree val = treeop0;
-       rtx ret = expand_expr_real_1 (val, target, tmode, modifier, alt_rtl);
+       rtx ret = expand_expr_real_1 (val, target, tmode, modifier, alt_rtl,
+                                     true);
 
        if (!SAVE_EXPR_RESOLVED_P (exp))
          {
@@ -9643,13 +9659,19 @@ expand_expr_real_1 (tree exp, rtx target, enum 
machine_mode tmode,
          }
        align = get_object_alignment (exp);
        op0 = expand_expr (base, NULL_RTX, VOIDmode, EXPAND_SUM);
-       op0 = memory_address_addr_space (mode, op0, as);
+       if (validate_p)
+         op0 = memory_address_addr_space (mode, op0, as);
+       else
+         op0 = convert_memory_address_addr_space (address_mode, op0, as);
        if (!integer_zerop (TREE_OPERAND (exp, 1)))
          {
            rtx off
              = immed_double_int_const (mem_ref_offset (exp), address_mode);
            op0 = simplify_gen_binary (PLUS, address_mode, op0, off);
-           op0 = memory_address_addr_space (mode, op0, as);
+           if (validate_p)
+             op0 = memory_address_addr_space (mode, op0, as);
+           else
+             op0 = convert_memory_address_addr_space (address_mode, op0, as);
          }
        temp = gen_rtx_MEM (mode, op0);
        set_mem_attributes (temp, exp, 0);
@@ -9874,6 +9896,11 @@ expand_expr_real_1 (tree exp, rtx target, enum 
machine_mode tmode,
        rtx orig_op0, memloc;
        bool mem_attrs_from_type = false;
 
+       /* If OFFSET is not NULL, we defer the address legitimization til
+          the moment that the base and offset have been both expanded and
+          summed up.  */
+       validate_p = offset == NULL;
+
        /* If we got back the original object, something is wrong.  Perhaps
           we are evaluating an expression too early.  In any event, don't
           infinitely recurse.  */
@@ -9883,15 +9910,16 @@ expand_expr_real_1 (tree exp, rtx target, enum 
machine_mode tmode,
           computation, since it will need a temporary and TARGET is known
           to have to do.  This occurs in unchecked conversion in Ada.  */
        orig_op0 = op0
-         = expand_expr (tem,
-                        (TREE_CODE (TREE_TYPE (tem)) == UNION_TYPE
-                         && COMPLETE_TYPE_P (TREE_TYPE (tem))
-                         && (TREE_CODE (TYPE_SIZE (TREE_TYPE (tem)))
-                             != INTEGER_CST)
-                         && modifier != EXPAND_STACK_PARM
-                         ? target : NULL_RTX),
-                        VOIDmode,
-                        modifier == EXPAND_SUM ? EXPAND_NORMAL : modifier);
+         = ((validate_p ? expand_expr : expand_expr_nv)
+            (tem,
+             (TREE_CODE (TREE_TYPE (tem)) == UNION_TYPE
+              && COMPLETE_TYPE_P (TREE_TYPE (tem))
+              && (TREE_CODE (TYPE_SIZE (TREE_TYPE (tem)))
+                  != INTEGER_CST)
+              && modifier != EXPAND_STACK_PARM
+              ? target : NULL_RTX),
+             VOIDmode,
+             modifier == EXPAND_SUM ? EXPAND_NORMAL : modifier));
 
        /* If the bitfield is volatile, we want to access it in the
           field's mode, not the computed mode.
@@ -9902,7 +9930,7 @@ expand_expr_real_1 (tree exp, rtx target, enum 
machine_mode tmode,
            if (volatilep && flag_strict_volatile_bitfields > 0)
              op0 = adjust_address (op0, mode1, 0);
            else if (GET_MODE (op0) == VOIDmode)
-             op0 = adjust_address (op0, BLKmode, 0);
+             op0 = adjust_address_1 (op0, BLKmode, 0, validate_p, 1, 0, 0);
          }
 
        mode2
@@ -9994,7 +10022,8 @@ expand_expr_real_1 (tree exp, rtx target, enum 
machine_mode tmode,
                && (bitsize % GET_MODE_ALIGNMENT (mode1)) == 0
                && MEM_ALIGN (op0) == GET_MODE_ALIGNMENT (mode1))
              {
-               op0 = adjust_address (op0, mode1, bitpos / BITS_PER_UNIT);
+               op0 = adjust_address_1 (op0, mode1, bitpos / BITS_PER_UNIT,
+                                       validate_p, 1, 0, 0);
                bitpos = 0;
              }
 
@@ -10487,7 +10516,7 @@ expand_expr_real_1 (tree exp, rtx target, enum 
machine_mode tmode,
       /* WITH_SIZE_EXPR expands to its first argument.  The caller should
         have pulled out the size to use in whatever context it needed.  */
       return expand_expr_real (treeop0, original_target, tmode,
-                              modifier, alt_rtl);
+                              modifier, alt_rtl, true);
 
     default:
       return expand_expr_real_2 (&ops, target, tmode, modifier);
diff --git a/gcc/expr.h b/gcc/expr.h
index 56f504a..7b5da10 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -428,9 +428,9 @@ extern rtx force_operand (rtx, rtx);
 
 /* Work horses for expand_expr.  */
 extern rtx expand_expr_real (tree, rtx, enum machine_mode,
-                            enum expand_modifier, rtx *);
+                            enum expand_modifier, rtx *, bool);
 extern rtx expand_expr_real_1 (tree, rtx, enum machine_mode,
-                              enum expand_modifier, rtx *);
+                              enum expand_modifier, rtx *, bool);
 extern rtx expand_expr_real_2 (sepops, rtx, enum machine_mode,
                               enum expand_modifier);
 
@@ -441,13 +441,20 @@ static inline rtx
 expand_expr (tree exp, rtx target, enum machine_mode mode,
             enum expand_modifier modifier)
 {
-  return expand_expr_real (exp, target, mode, modifier, NULL);
+  return expand_expr_real (exp, target, mode, modifier, NULL, true);
+}
+
+static inline rtx
+expand_expr_nv (tree exp, rtx target, enum machine_mode mode,
+               enum expand_modifier modifier)
+{
+  return expand_expr_real (exp, target, mode, modifier, NULL, false);
 }
 
 static inline rtx
 expand_normal (tree exp)
 {
-  return expand_expr_real (exp, NULL_RTX, VOIDmode, EXPAND_NORMAL, NULL);
+  return expand_expr_real (exp, NULL_RTX, VOIDmode, EXPAND_NORMAL, NULL, true);
 }
 
 /* At the start of a function, record that we have no previously-pushed

Reply via email to