Hi, The IVOPT in GCC has a problem that it does not use cost of auto-increment address expression in accounting, while it retreats to cost of address expression if auto-increment addressing mode is unavailable. For example, on ARM target: 1) the cost of "[reg]" (which is 6) is used for address expression "[reg], #off"; 2) the cost of "[reg+off]" (which is 2) is used for address expression "[reg, #off]!";
This causes: 1) cost of non-auto increment address expression is used for auto-increment address expression; 2) different address costs are used for pre/post increment address expressions. This patch fixes the problem by computing, caching and using the cost of auto-increment address expressions. Bootstrap and test on x86/arm. Is it OK? 2013-11-01 Bin Cheng <bin.ch...@arm.com> * tree-ssa-loop-ivopts.c (enum ainc_type): New. (address_cost_data): New field. (get_address_cost): Compute auto-increment rtx cost in ainc_costs. Use ainc_costs for auto-increment rtx patterns. Cleanup TWS.
Index: gcc/tree-ssa-loop-ivopts.c =================================================================== --- gcc/tree-ssa-loop-ivopts.c (revision 204117) +++ gcc/tree-ssa-loop-ivopts.c (working copy) @@ -3185,10 +3185,21 @@ multiplier_allowed_in_address_p (HOST_WIDE_INT rat TODO -- there must be some better way. This all is quite crude. */ +enum ainc_type +{ + AINC_PRE_INC, /* Pre increment. */ + AINC_PRE_DEC, /* Pre decrement. */ + AINC_POST_INC, /* Post increment. */ + AINC_POST_DEC, /* Post decrement. */ + AINC_NUM, /* Number of auto increment types. */ + AINC_NONE +}; + typedef struct address_cost_data_s { HOST_WIDE_INT min_offset, max_offset; unsigned costs[2][2][2][2]; + unsigned ainc_costs[AINC_NUM]; } *address_cost_data; @@ -3206,6 +3217,7 @@ get_address_cost (bool symbol_present, bool var_pr static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE]; static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE]; unsigned cost, acost, complexity; + enum ainc_type autoinc_type; bool offset_p, ratio_p, autoinc; HOST_WIDE_INT s_offset, autoinc_offset, msize; unsigned HOST_WIDE_INT mask; @@ -3277,33 +3289,49 @@ get_address_cost (bool symbol_present, bool var_pr reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2); - if (USE_LOAD_PRE_DECREMENT (mem_mode) + if (USE_LOAD_PRE_DECREMENT (mem_mode) || USE_STORE_PRE_DECREMENT (mem_mode)) { addr = gen_rtx_PRE_DEC (address_mode, reg0); has_predec[mem_mode] = memory_address_addr_space_p (mem_mode, addr, as); + + if (has_predec[mem_mode]) + data->ainc_costs[AINC_PRE_DEC] + = address_cost (addr, mem_mode, as, speed); } - if (USE_LOAD_POST_DECREMENT (mem_mode) + if (USE_LOAD_POST_DECREMENT (mem_mode) || USE_STORE_POST_DECREMENT (mem_mode)) { addr = gen_rtx_POST_DEC (address_mode, reg0); has_postdec[mem_mode] = memory_address_addr_space_p (mem_mode, addr, as); + + if (has_postdec[mem_mode]) + data->ainc_costs[AINC_POST_DEC] + = address_cost (addr, mem_mode, as, speed); } - if (USE_LOAD_PRE_INCREMENT (mem_mode) + if (USE_LOAD_PRE_INCREMENT (mem_mode) || USE_STORE_PRE_DECREMENT (mem_mode)) { addr = gen_rtx_PRE_INC (address_mode, reg0); has_preinc[mem_mode] = memory_address_addr_space_p (mem_mode, addr, as); + + if (has_preinc[mem_mode]) + data->ainc_costs[AINC_PRE_INC] + = address_cost (addr, mem_mode, as, speed); } - if (USE_LOAD_POST_INCREMENT (mem_mode) + if (USE_LOAD_POST_INCREMENT (mem_mode) || USE_STORE_POST_INCREMENT (mem_mode)) { addr = gen_rtx_POST_INC (address_mode, reg0); has_postinc[mem_mode] = memory_address_addr_space_p (mem_mode, addr, as); + + if (has_postinc[mem_mode]) + data->ainc_costs[AINC_POST_INC] + = address_cost (addr, mem_mode, as, speed); } for (i = 0; i < 16; i++) { @@ -3429,22 +3457,32 @@ get_address_cost (bool symbol_present, bool var_pr s_offset = offset; autoinc = false; + autoinc_type = AINC_NONE; msize = GET_MODE_SIZE (mem_mode); autoinc_offset = offset; if (stmt_after_inc) autoinc_offset += ratio * cstep; if (symbol_present || var_present || ratio != 1) autoinc = false; - else if ((has_postinc[mem_mode] && autoinc_offset == 0 + else + { + if (has_postinc[mem_mode] && autoinc_offset == 0 + && msize == cstep) + autoinc_type = AINC_POST_INC; + else if (has_postdec[mem_mode] && autoinc_offset == 0 + && msize == -cstep) + autoinc_type = AINC_POST_DEC; + else if (has_preinc[mem_mode] && autoinc_offset == msize && msize == cstep) - || (has_postdec[mem_mode] && autoinc_offset == 0 + autoinc_type = AINC_PRE_INC; + else if (has_predec[mem_mode] && autoinc_offset == -msize && msize == -cstep) - || (has_preinc[mem_mode] && autoinc_offset == msize - && msize == cstep) - || (has_predec[mem_mode] && autoinc_offset == -msize - && msize == -cstep)) - autoinc = true; + autoinc_type = AINC_PRE_DEC; + if (autoinc_type != AINC_NONE) + autoinc = true; + } + cost = 0; offset_p = (s_offset != 0 && data->min_offset <= s_offset @@ -3460,7 +3498,10 @@ get_address_cost (bool symbol_present, bool var_pr if (may_autoinc) *may_autoinc = autoinc; - acost = data->costs[symbol_present][var_present][offset_p][ratio_p]; + if (autoinc) + acost = data->ainc_costs[autoinc_type]; + else + acost = data->costs[symbol_present][var_present][offset_p][ratio_p]; complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p; return new_cost (cost + acost, complexity); }