Hi,
Currently the address legitimization (by calling
memory_address_addr_space) is carried out twice during the RTL expansion
of ARRAY_REF, COMPONENT_REF, etc. when their OFFSET is not NULL. It is
done once for the BASE and once for the summed address in
offset_address. This may cause part, if not all, of the generated BASE
RTL to be forced into reg(s), preventing the RTL generator from carrying
out effective re-association across BASE and OFFSET (via
simplify_gen_binary).
For example, given the following test case:
typedef int arr_2[20][20];
void foo (arr_2 a2, int i, int j)
{
a2[i+10][j] = 1;
}
the RTL code for the BASE (i.e. a2[i+10]) on arm (-mcpu=cortex-a15,
-mthumb) is
* before the legitimization of BASE:
(plus:SI (plus:SI (mult:SI (reg/v:SI 115 [ i ])
(const_int 80 [0x50]))
(reg/v/f:SI 114 [ a2 ]))
(const_int 800 [0x320]))
(reg/f:SI 122)
* after the legitimization of BASE:
(reg/f:SI 122)
"Thanks to" the initial legitimization, the RTL for the final address is
turned into:
(plus:SI (mult:SI (reg/v:SI 116 [ j ])
(const_int 4 [0x4]))
(reg/f:SI 122))
while with the legitimization deferred, the RTL for the final address
could be:
(plus:SI (plus:SI (plus:SI (mult:SI (reg/v:SI 115 [ i ])
(const_int 80 [0x50]))
(mult:SI (reg/v:SI 116 [ j ])
(const_int 4 [0x4])))
(reg/v/f:SI 114 [ a2 ]))
(const_int 800 [0x320]))
which has more complete info in the RTL and is much more canonicalized;
later on it could open up more opportunities for CSE.
The effect of this duplicated legitimization effort varies across
different targets, as it is strongly related to the available addressing
modes on a target. On RISC machines where in general there are fewer
addressing modes (which are in general less complicated as well), the
RTL code quality can be affected more adversely.
The patch passes bootstrapping on arm and x86_64 and regtest on
arm-none-eabi, aarch64-none-elf and x86_64. There is no regression in
spec2000 on arm or x86_64.
OK for the mainline?
Thanks,
Yufeng
gcc/
* cfgexpand.c (expand_call_stmt): Update the call to
expand_expr_real_1.
* expr.c (expand_assignment): Add new local variable validate_p
and set
it; call expand_expr or expand_expr_nv depending on validate_p for
to_rtx; call adjust_address_1 instead of adjust_address.
(store_expr): Update the call to expand_expr_real.
(expand_expr_real): Add new parameter 'validate_p'; update the
call to
expand_expr_real_1.
(expand_expr_real_1): Add new parameter 'validate_p'; update
the call
to expand_expr_real; depending on validate_p, call
memory_address_addr_space or convert_memory_address_addr_space;
likewise for expand_expr or expand_expr_nv; call adjust_address_1
instead of adjust_address.
* expr.h (expand_expr_real): Update.
(expand_expr_real_1): Update.
(expand_expr): Update.
(expand_expr_nv): New function.
(expand_normal): Update.
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index c312c37..e01c7ff 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -2189,7 +2189,7 @@ expand_call_stmt (gimple stmt)
if (lhs)
expand_assignment (lhs, exp, false);
else
- expand_expr_real_1 (exp, const0_rtx, VOIDmode, EXPAND_NORMAL, NULL);
+ expand_expr_real_1 (exp, const0_rtx, VOIDmode, EXPAND_NORMAL, NULL, true);
mark_transaction_restart_calls (stmt);
}
diff --git a/gcc/expr.c b/gcc/expr.c
index 89e3979..90a2405 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -4714,6 +4714,7 @@ expand_assignment (tree to, tree from, bool nontemporal)
int unsignedp;
int volatilep = 0;
tree tem;
+ bool validate_p;
push_temp_slots ();
tem = get_inner_reference (to, &bitsize, &bitpos, &offset, &mode1,
@@ -4723,7 +4724,12 @@ expand_assignment (tree to, tree from, bool nontemporal)
&& DECL_BIT_FIELD_TYPE (TREE_OPERAND (to, 1)))
get_bit_range (&bitregion_start, &bitregion_end, to, &bitpos, &offset);
- to_rtx = expand_expr (tem, NULL_RTX, VOIDmode, EXPAND_WRITE);
+ /* If OFFSET is not NULL, we defer the address legitimization til
+ the moment that the base and offset have been both expanded and
+ summed up. */
+ validate_p = offset == NULL;
+ to_rtx = ((validate_p ? expand_expr : expand_expr_nv)
+ (tem, NULL_RTX, VOIDmode, EXPAND_WRITE));
/* If the bitfield is volatile, we want to access it in the
field's mode, not the computed mode.
@@ -4734,9 +4740,9 @@ expand_assignment (tree to, tree from, bool nontemporal)
if (volatilep && flag_strict_volatile_bitfields > 0)
to_rtx = adjust_address (to_rtx, mode1, 0);
else if (GET_MODE (to_rtx) == VOIDmode)
- to_rtx = adjust_address (to_rtx, BLKmode, 0);
+ to_rtx = adjust_address_1 (to_rtx, BLKmode, 0, validate_p, 1, 0, 0);
}
-
+
if (offset != 0)
{
enum machine_mode address_mode;
@@ -4766,7 +4772,8 @@ expand_assignment (tree to, tree from, bool nontemporal)
&& (bitsize % GET_MODE_ALIGNMENT (mode1)) == 0
&& MEM_ALIGN (to_rtx) == GET_MODE_ALIGNMENT (mode1))
{
- to_rtx = adjust_address (to_rtx, mode1, bitpos / BITS_PER_UNIT);
+ to_rtx = adjust_address_1 (to_rtx, mode1, bitpos / BITS_PER_UNIT,
+ validate_p, 1, 0, 0);
bitpos = 0;
}
@@ -5209,7 +5216,7 @@ store_expr (tree exp, rtx target, int call_param_p, bool
nontemporal)
temp = expand_expr_real (exp, tmp_target, GET_MODE (target),
(call_param_p
? EXPAND_STACK_PARM : EXPAND_NORMAL),
- &alt_rtl);
+ &alt_rtl, true);
}
/* If TEMP is a VOIDmode constant and the mode of the type of EXP is not
@@ -7826,11 +7833,18 @@ expand_constructor (tree exp, rtx target, enum
expand_modifier modifier,
address, and ALT_RTL is non-NULL, then *ALT_RTL is set to the
DECL_RTL of the VAR_DECL. *ALT_RTL is also set if EXP is a
COMPOUND_EXPR whose second argument is such a VAR_DECL, and so on
- recursively. */
+ recursively.
+
+ If VALIDATE_P is false, the address legitimization will not be applied
+ to the generated addressing code for MEM_REF. This is to help
+ defer the address legitimization for inner-referencing expressions, e.g.
+ COMPONENT_REF, ARRAY_REF, BIT_FIELD_REF, etc., til their base and offset
+ have been both expanded and summed up. */
rtx
expand_expr_real (tree exp, rtx target, enum machine_mode tmode,
- enum expand_modifier modifier, rtx *alt_rtl)
+ enum expand_modifier modifier, rtx *alt_rtl,
+ bool validate_p)
{
rtx ret;
@@ -7842,7 +7856,7 @@ expand_expr_real (tree exp, rtx target, enum machine_mode
tmode,
return ret ? ret : const0_rtx;
}
- ret = expand_expr_real_1 (exp, target, tmode, modifier, alt_rtl);
+ ret = expand_expr_real_1 (exp, target, tmode, modifier, alt_rtl, validate_p);
return ret;
}
@@ -9153,7 +9167,8 @@ stmt_is_replaceable_p (gimple stmt)
rtx
expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
- enum expand_modifier modifier, rtx *alt_rtl)
+ enum expand_modifier modifier, rtx *alt_rtl,
+ bool validate_p)
{
rtx op0, op1, temp, decl_rtl;
tree type;
@@ -9299,7 +9314,7 @@ expand_expr_real_1 (tree exp, rtx target, enum
machine_mode tmode,
set_curr_insn_location (gimple_location (g));
r = expand_expr_real (gimple_assign_rhs_to_tree (g), target,
- tmode, modifier, NULL);
+ tmode, modifier, NULL, true);
set_curr_insn_location (saved_loc);
if (REG_P (r) && !REG_EXPR (r))
set_reg_attrs_for_decl_rtl (SSA_NAME_VAR (exp), r);
@@ -9520,7 +9535,8 @@ expand_expr_real_1 (tree exp, rtx target, enum
machine_mode tmode,
case SAVE_EXPR:
{
tree val = treeop0;
- rtx ret = expand_expr_real_1 (val, target, tmode, modifier, alt_rtl);
+ rtx ret = expand_expr_real_1 (val, target, tmode, modifier, alt_rtl,
+ true);
if (!SAVE_EXPR_RESOLVED_P (exp))
{
@@ -9643,13 +9659,19 @@ expand_expr_real_1 (tree exp, rtx target, enum
machine_mode tmode,
}
align = get_object_alignment (exp);
op0 = expand_expr (base, NULL_RTX, VOIDmode, EXPAND_SUM);
- op0 = memory_address_addr_space (mode, op0, as);
+ if (validate_p)
+ op0 = memory_address_addr_space (mode, op0, as);
+ else
+ op0 = convert_memory_address_addr_space (address_mode, op0, as);
if (!integer_zerop (TREE_OPERAND (exp, 1)))
{
rtx off
= immed_double_int_const (mem_ref_offset (exp), address_mode);
op0 = simplify_gen_binary (PLUS, address_mode, op0, off);
- op0 = memory_address_addr_space (mode, op0, as);
+ if (validate_p)
+ op0 = memory_address_addr_space (mode, op0, as);
+ else
+ op0 = convert_memory_address_addr_space (address_mode, op0, as);
}
temp = gen_rtx_MEM (mode, op0);
set_mem_attributes (temp, exp, 0);
@@ -9874,6 +9896,11 @@ expand_expr_real_1 (tree exp, rtx target, enum
machine_mode tmode,
rtx orig_op0, memloc;
bool mem_attrs_from_type = false;
+ /* If OFFSET is not NULL, we defer the address legitimization til
+ the moment that the base and offset have been both expanded and
+ summed up. */
+ validate_p = offset == NULL;
+
/* If we got back the original object, something is wrong. Perhaps
we are evaluating an expression too early. In any event, don't
infinitely recurse. */
@@ -9883,15 +9910,16 @@ expand_expr_real_1 (tree exp, rtx target, enum
machine_mode tmode,
computation, since it will need a temporary and TARGET is known
to have to do. This occurs in unchecked conversion in Ada. */
orig_op0 = op0
- = expand_expr (tem,
- (TREE_CODE (TREE_TYPE (tem)) == UNION_TYPE
- && COMPLETE_TYPE_P (TREE_TYPE (tem))
- && (TREE_CODE (TYPE_SIZE (TREE_TYPE (tem)))
- != INTEGER_CST)
- && modifier != EXPAND_STACK_PARM
- ? target : NULL_RTX),
- VOIDmode,
- modifier == EXPAND_SUM ? EXPAND_NORMAL : modifier);
+ = ((validate_p ? expand_expr : expand_expr_nv)
+ (tem,
+ (TREE_CODE (TREE_TYPE (tem)) == UNION_TYPE
+ && COMPLETE_TYPE_P (TREE_TYPE (tem))
+ && (TREE_CODE (TYPE_SIZE (TREE_TYPE (tem)))
+ != INTEGER_CST)
+ && modifier != EXPAND_STACK_PARM
+ ? target : NULL_RTX),
+ VOIDmode,
+ modifier == EXPAND_SUM ? EXPAND_NORMAL : modifier));
/* If the bitfield is volatile, we want to access it in the
field's mode, not the computed mode.
@@ -9902,7 +9930,7 @@ expand_expr_real_1 (tree exp, rtx target, enum
machine_mode tmode,
if (volatilep && flag_strict_volatile_bitfields > 0)
op0 = adjust_address (op0, mode1, 0);
else if (GET_MODE (op0) == VOIDmode)
- op0 = adjust_address (op0, BLKmode, 0);
+ op0 = adjust_address_1 (op0, BLKmode, 0, validate_p, 1, 0, 0);
}
mode2
@@ -9994,7 +10022,8 @@ expand_expr_real_1 (tree exp, rtx target, enum
machine_mode tmode,
&& (bitsize % GET_MODE_ALIGNMENT (mode1)) == 0
&& MEM_ALIGN (op0) == GET_MODE_ALIGNMENT (mode1))
{
- op0 = adjust_address (op0, mode1, bitpos / BITS_PER_UNIT);
+ op0 = adjust_address_1 (op0, mode1, bitpos / BITS_PER_UNIT,
+ validate_p, 1, 0, 0);
bitpos = 0;
}
@@ -10487,7 +10516,7 @@ expand_expr_real_1 (tree exp, rtx target, enum
machine_mode tmode,
/* WITH_SIZE_EXPR expands to its first argument. The caller should
have pulled out the size to use in whatever context it needed. */
return expand_expr_real (treeop0, original_target, tmode,
- modifier, alt_rtl);
+ modifier, alt_rtl, true);
default:
return expand_expr_real_2 (&ops, target, tmode, modifier);
diff --git a/gcc/expr.h b/gcc/expr.h
index 56f504a..7b5da10 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -428,9 +428,9 @@ extern rtx force_operand (rtx, rtx);
/* Work horses for expand_expr. */
extern rtx expand_expr_real (tree, rtx, enum machine_mode,
- enum expand_modifier, rtx *);
+ enum expand_modifier, rtx *, bool);
extern rtx expand_expr_real_1 (tree, rtx, enum machine_mode,
- enum expand_modifier, rtx *);
+ enum expand_modifier, rtx *, bool);
extern rtx expand_expr_real_2 (sepops, rtx, enum machine_mode,
enum expand_modifier);
@@ -441,13 +441,20 @@ static inline rtx
expand_expr (tree exp, rtx target, enum machine_mode mode,
enum expand_modifier modifier)
{
- return expand_expr_real (exp, target, mode, modifier, NULL);
+ return expand_expr_real (exp, target, mode, modifier, NULL, true);
+}
+
+static inline rtx
+expand_expr_nv (tree exp, rtx target, enum machine_mode mode,
+ enum expand_modifier modifier)
+{
+ return expand_expr_real (exp, target, mode, modifier, NULL, false);
}
static inline rtx
expand_normal (tree exp)
{
- return expand_expr_real (exp, NULL_RTX, VOIDmode, EXPAND_NORMAL, NULL);
+ return expand_expr_real (exp, NULL_RTX, VOIDmode, EXPAND_NORMAL, NULL, true);
}
/* At the start of a function, record that we have no previously-pushed