Hi,
The IVOPT in GCC has a problem that it does not use cost of auto-increment
address expression in accounting, while it retreats to cost of address
expression if auto-increment addressing mode is unavailable.
For example, on ARM target:
1) the cost of "[reg]" (which is 6) is used for address expression "[reg],
#off";
2) the cost of "[reg+off]" (which is 2) is used for address expression
"[reg, #off]!";
This causes:
1) cost of non-auto increment address expression is used for auto-increment
address expression;
2) different address costs are used for pre/post increment address
expressions.
This patch fixes the problem by computing, caching and using the cost of
auto-increment address expressions.
Bootstrap and test on x86/arm. Is it OK?
2013-11-01 Bin Cheng <[email protected]>
* tree-ssa-loop-ivopts.c (enum ainc_type): New.
(address_cost_data): New field.
(get_address_cost): Compute auto-increment rtx cost in ainc_costs.
Use ainc_costs for auto-increment rtx patterns.
Cleanup TWS.
Index: gcc/tree-ssa-loop-ivopts.c
===================================================================
--- gcc/tree-ssa-loop-ivopts.c (revision 204117)
+++ gcc/tree-ssa-loop-ivopts.c (working copy)
@@ -3185,10 +3185,21 @@ multiplier_allowed_in_address_p (HOST_WIDE_INT rat
TODO -- there must be some better way. This all is quite crude. */
+enum ainc_type
+{
+ AINC_PRE_INC, /* Pre increment. */
+ AINC_PRE_DEC, /* Pre decrement. */
+ AINC_POST_INC, /* Post increment. */
+ AINC_POST_DEC, /* Post decrement. */
+ AINC_NUM, /* Number of auto increment types. */
+ AINC_NONE
+};
+
typedef struct address_cost_data_s
{
HOST_WIDE_INT min_offset, max_offset;
unsigned costs[2][2][2][2];
+ unsigned ainc_costs[AINC_NUM];
} *address_cost_data;
@@ -3206,6 +3217,7 @@ get_address_cost (bool symbol_present, bool var_pr
static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
unsigned cost, acost, complexity;
+ enum ainc_type autoinc_type;
bool offset_p, ratio_p, autoinc;
HOST_WIDE_INT s_offset, autoinc_offset, msize;
unsigned HOST_WIDE_INT mask;
@@ -3277,33 +3289,49 @@ get_address_cost (bool symbol_present, bool var_pr
reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
- if (USE_LOAD_PRE_DECREMENT (mem_mode)
+ if (USE_LOAD_PRE_DECREMENT (mem_mode)
|| USE_STORE_PRE_DECREMENT (mem_mode))
{
addr = gen_rtx_PRE_DEC (address_mode, reg0);
has_predec[mem_mode]
= memory_address_addr_space_p (mem_mode, addr, as);
+
+ if (has_predec[mem_mode])
+ data->ainc_costs[AINC_PRE_DEC]
+ = address_cost (addr, mem_mode, as, speed);
}
- if (USE_LOAD_POST_DECREMENT (mem_mode)
+ if (USE_LOAD_POST_DECREMENT (mem_mode)
|| USE_STORE_POST_DECREMENT (mem_mode))
{
addr = gen_rtx_POST_DEC (address_mode, reg0);
has_postdec[mem_mode]
= memory_address_addr_space_p (mem_mode, addr, as);
+
+ if (has_postdec[mem_mode])
+ data->ainc_costs[AINC_POST_DEC]
+ = address_cost (addr, mem_mode, as, speed);
}
- if (USE_LOAD_PRE_INCREMENT (mem_mode)
+ if (USE_LOAD_PRE_INCREMENT (mem_mode)
|| USE_STORE_PRE_DECREMENT (mem_mode))
{
addr = gen_rtx_PRE_INC (address_mode, reg0);
has_preinc[mem_mode]
= memory_address_addr_space_p (mem_mode, addr, as);
+
+ if (has_preinc[mem_mode])
+ data->ainc_costs[AINC_PRE_INC]
+ = address_cost (addr, mem_mode, as, speed);
}
- if (USE_LOAD_POST_INCREMENT (mem_mode)
+ if (USE_LOAD_POST_INCREMENT (mem_mode)
|| USE_STORE_POST_INCREMENT (mem_mode))
{
addr = gen_rtx_POST_INC (address_mode, reg0);
has_postinc[mem_mode]
= memory_address_addr_space_p (mem_mode, addr, as);
+
+ if (has_postinc[mem_mode])
+ data->ainc_costs[AINC_POST_INC]
+ = address_cost (addr, mem_mode, as, speed);
}
for (i = 0; i < 16; i++)
{
@@ -3429,22 +3457,32 @@ get_address_cost (bool symbol_present, bool var_pr
s_offset = offset;
autoinc = false;
+ autoinc_type = AINC_NONE;
msize = GET_MODE_SIZE (mem_mode);
autoinc_offset = offset;
if (stmt_after_inc)
autoinc_offset += ratio * cstep;
if (symbol_present || var_present || ratio != 1)
autoinc = false;
- else if ((has_postinc[mem_mode] && autoinc_offset == 0
+ else
+ {
+ if (has_postinc[mem_mode] && autoinc_offset == 0
+ && msize == cstep)
+ autoinc_type = AINC_POST_INC;
+ else if (has_postdec[mem_mode] && autoinc_offset == 0
+ && msize == -cstep)
+ autoinc_type = AINC_POST_DEC;
+ else if (has_preinc[mem_mode] && autoinc_offset == msize
&& msize == cstep)
- || (has_postdec[mem_mode] && autoinc_offset == 0
+ autoinc_type = AINC_PRE_INC;
+ else if (has_predec[mem_mode] && autoinc_offset == -msize
&& msize == -cstep)
- || (has_preinc[mem_mode] && autoinc_offset == msize
- && msize == cstep)
- || (has_predec[mem_mode] && autoinc_offset == -msize
- && msize == -cstep))
- autoinc = true;
+ autoinc_type = AINC_PRE_DEC;
+ if (autoinc_type != AINC_NONE)
+ autoinc = true;
+ }
+
cost = 0;
offset_p = (s_offset != 0
&& data->min_offset <= s_offset
@@ -3460,7 +3498,10 @@ get_address_cost (bool symbol_present, bool var_pr
if (may_autoinc)
*may_autoinc = autoinc;
- acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
+ if (autoinc)
+ acost = data->ainc_costs[autoinc_type];
+ else
+ acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
return new_cost (cost + acost, complexity);
}