Hi, This patch clears up the cost model for noce_try_cmove_arith. We lose the "??? FIXME: Magic number 5" comment, and gain a more realistic cost model for if-converting memory accesses.
This is the patch that will cause the largest behavioural change for most targets - the current heuristic does not take in to consideration the cost of a conditional move - once we add that the cost of the converted sequence often looks much higher than (BRANCH_COST * COSTS_N_INSNS (1)). I think that missing the cost of the conditional move from these sequences is not a good idea, and that the cost model should rely on the target giving back good information. A target that finds tests failing after this patch should consider either reducing the cost of a conditional move sequence, or increasing TARGET_RTX_BRANCH_COST. OK? Thanks, James --- gcc/ 2016-06-02 James Greenhalgh <james.greenha...@arm.com> * ifcvt.c (noce_code_is_comparison_p): New. (noce_cmove_cost): Likewise. (noce_try_cmove_arith): Use it.
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c index b192c85..bd3f55d 100644 --- a/gcc/ifcvt.c +++ b/gcc/ifcvt.c @@ -830,6 +830,78 @@ static int noce_try_minmax (struct noce_if_info *); static int noce_try_abs (struct noce_if_info *); static int noce_try_sign_mask (struct noce_if_info *); +/* Return TRUE if CODE is an RTX comparison operator. */ + +static bool +noce_code_is_comparison_p (rtx_code code) +{ + switch (code) + { + case NE: + case EQ: + case GE: + case GT: + case LE: + case LT: + case GEU: + case GTU: + case LEU: + case LTU: + case UNORDERED: + case ORDERED: + case UNEQ: + case UNGE: + case UNGT: + case UNLE: + case UNLT: + case LTGT: + return true; + default: + return false; + } +} + +/* Return the estimated cost of a single conditional move, where the + condition is calculated using the COMPARISON operator in mode CMODE, + and the store is in mode SMODE, depending on whether we are compiling + for SPEED_P. */ + +static unsigned int +noce_cmove_estimate_cost (machine_mode cmode, machine_mode smode, + rtx_code comparison, bool speed_p) +{ + unsigned int cost = 0; + + gcc_checking_assert (noce_code_is_comparison_p (comparison)); + + start_sequence (); + + /* We're only estimating, so we don't need to be too cautious about + getting the operands correct, but we would like an estimate. We + do need at least two registers, to avoid the comparison being + folded. */ + rtx creg = gen_reg_rtx (cmode); + rtx creg2 = gen_reg_rtx (cmode); + rtx sreg = gen_reg_rtx (smode); + rtx sreg2 = gen_reg_rtx (smode); + rtx dest = emit_conditional_move (sreg, comparison, creg, creg2, + cmode, sreg, sreg2, smode, false); + if (!dest) + { + /* Set something suitably high in here, as our best guess + is that the if-conversion will fail. */ + cost = COSTS_N_INSNS (32); + } + else + { + rtx_insn *seq = get_insns (); + cost = seq_cost (seq, speed_p); + } + end_sequence (); + + return cost; +} + /* This function is always called when we would expand a number of "cheap" instructions. Multiply NINSNS by COSTS_N_INSNS (1) to approximate the RTX cost of those cheap instructions. */ @@ -2040,7 +2112,8 @@ noce_try_cmove_arith (struct noce_if_info *if_info) rtx a = if_info->a; rtx b = if_info->b; rtx x = if_info->x; - rtx orig_a, orig_b; + rtx orig_a = a; + rtx orig_b = b; rtx_insn *insn_a, *insn_b; bool a_simple = if_info->then_simple; bool b_simple = if_info->else_simple; @@ -2050,16 +2123,15 @@ noce_try_cmove_arith (struct noce_if_info *if_info) int is_mem = 0; enum rtx_code code; rtx_insn *ifcvt_seq; + bool speed_p = optimize_bb_for_speed_p (if_info->test_bb); /* A conditional move from two memory sources is equivalent to a conditional on their addresses followed by a load. Don't do this early because it'll screw alias analysis. Note that we've already checked for no side effects. */ - /* ??? FIXME: Magic number 5. */ if (cse_not_expected && MEM_P (a) && MEM_P (b) - && MEM_ADDR_SPACE (a) == MEM_ADDR_SPACE (b) - && noce_estimate_conversion_profitable_p (if_info, 5)) + && MEM_ADDR_SPACE (a) == MEM_ADDR_SPACE (b)) { machine_mode address_mode = get_address_mode (a); @@ -2087,6 +2159,7 @@ noce_try_cmove_arith (struct noce_if_info *if_info) insn_b = if_info->insn_b; machine_mode x_mode = GET_MODE (x); + machine_mode cmode = GET_MODE (XEXP (if_info->cond, 0)); if (!can_conditionally_move_p (x_mode)) return FALSE; @@ -2103,12 +2176,32 @@ noce_try_cmove_arith (struct noce_if_info *if_info) else else_cost = 0; - /* We're going to execute one of the basic blocks anyway, so - bail out if the most expensive of the two blocks is unacceptable. */ + if (!is_mem) + { + /* If convert in the case that: + + then_cost + else_cost + noce_cmove_estimate_cost (x_mode) + <= MIN (then_cost, else_cost) + if_info->rtx_edge_cost - /* TODO: Revisit cost model. */ - if (MAX (then_cost, else_cost) > if_info->rtx_edge_cost) - return FALSE; + Which we rearrange using the rule + (a + b - MIN (a, b) == MAX (a, b)) + to get... */ + if ((MAX (then_cost, else_cost) + + noce_cmove_estimate_cost (cmode, x_mode, code, speed_p)) + > if_info->rtx_edge_cost) + { + return FALSE; + } + } + else + { + /* We're trying to convert a branch and a conditional load in to an + unconditional load and a conditional move. Cost those directly. */ + if ((then_cost + + noce_cmove_estimate_cost (cmode, x_mode, code, speed_p)) + > if_info->rtx_edge_cost + MIN (then_cost, else_cost)) + return FALSE; + } /* Possibly rearrange operands to make things come out more natural. */ if (reversed_comparison_code (if_info->cond, if_info->jump) != UNKNOWN)