Hi, This patch clears up the cost model for noce_try_cmove_arith. We lose the "??? FIXME: Magic number 5" comment, and gain a more realistic cost model for if-converting memory accesses.
This is the patch that has the chance to cause the largest behavioural changes for most targets - the current heuristic does not take in to consideration the cost of a conditional move - once we add that the cost of the converted sequence often looks higher than we allowed before. I think that missing the cost of the conditional move from these sequences is not a good idea, and that the cost model should rely on the target giving back good information. A target that finds tests failing after this patch should consider either reducing the cost of a conditional move sequence, or increasing TARGET_MAX_NOCE_IFCVT_SEQ_COST. As this ups the cost of if-convert dramatically, I've used the new parameters to ensure that the tests in the testsuite continue to pass on all targets. Bootstrapped in series on aarch64 and x86-64. OK? Thanks, James --- gcc/ 2016-06-21 James Greenhalgh <james.greenha...@arm.com> * ifcvt.c (noce_try_cmove_arith): Check costs after constructing new sequence. gcc/testsuite/ 2016-06-21 James Greenhalgh <james.greenha...@arm.com> * gcc.dg/ifcvt-2.c: Use parameter to guide if-conversion heuristics. * gcc.dg/ifcvt-3.c: Use parameter to guide if-conversion heuristics. * gcc.dg/pr68435.c: Use parameter to guide if-conversion heuristics.
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c index f4ad037..78906d3 100644 --- a/gcc/ifcvt.c +++ b/gcc/ifcvt.c @@ -2092,7 +2092,8 @@ noce_try_cmove_arith (struct noce_if_info *if_info) rtx a = if_info->a; rtx b = if_info->b; rtx x = if_info->x; - rtx orig_a, orig_b; + rtx orig_a = a; + rtx orig_b = b; rtx_insn *insn_a, *insn_b; bool a_simple = if_info->then_simple; bool b_simple = if_info->else_simple; @@ -2102,16 +2103,15 @@ noce_try_cmove_arith (struct noce_if_info *if_info) int is_mem = 0; enum rtx_code code; rtx_insn *ifcvt_seq; + bool speed_p = optimize_bb_for_speed_p (if_info->test_bb); /* A conditional move from two memory sources is equivalent to a conditional on their addresses followed by a load. Don't do this early because it'll screw alias analysis. Note that we've already checked for no side effects. */ - /* ??? FIXME: Magic number 5. */ if (cse_not_expected && MEM_P (a) && MEM_P (b) - && MEM_ADDR_SPACE (a) == MEM_ADDR_SPACE (b) - && noce_estimate_conversion_profitable_p (if_info, 5)) + && MEM_ADDR_SPACE (a) == MEM_ADDR_SPACE (b)) { machine_mode address_mode = get_address_mode (a); @@ -2143,25 +2143,6 @@ noce_try_cmove_arith (struct noce_if_info *if_info) if (!can_conditionally_move_p (x_mode)) return FALSE; - unsigned int then_cost; - unsigned int else_cost; - if (insn_a) - then_cost = if_info->then_cost; - else - then_cost = 0; - - if (insn_b) - else_cost = if_info->else_cost; - else - else_cost = 0; - - /* We're going to execute one of the basic blocks anyway, so - bail out if the most expensive of the two blocks is unacceptable. */ - - /* TODO: Revisit cost model. */ - if (MAX (then_cost, else_cost) > if_info->max_seq_cost) - return FALSE; - /* Possibly rearrange operands to make things come out more natural. */ if (reversed_comparison_code (if_info->cond, if_info->jump) != UNKNOWN) { @@ -2353,6 +2334,12 @@ noce_try_cmove_arith (struct noce_if_info *if_info) if (!ifcvt_seq) return FALSE; + /* Check that our cost model will allow the transform. */ + + if (seq_cost (ifcvt_seq, speed_p) > if_info->max_seq_cost) + /* Just return false, the sequence has already been finalized. */ + return FALSE; + emit_insn_before_setloc (ifcvt_seq, if_info->jump, INSN_LOCATION (if_info->insn_a)); if_info->transform_name = "noce_try_cmove_arith"; diff --git a/gcc/testsuite/gcc.dg/ifcvt-2.c b/gcc/testsuite/gcc.dg/ifcvt-2.c index e0e1728..73e0dcc 100644 --- a/gcc/testsuite/gcc.dg/ifcvt-2.c +++ b/gcc/testsuite/gcc.dg/ifcvt-2.c @@ -1,5 +1,5 @@ /* { dg-do compile { target aarch64*-*-* x86_64-*-* } } */ -/* { dg-options "-fdump-rtl-ce1 -O2" } */ +/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-unpredictable-cost=100" } */ typedef unsigned char uint8_t; diff --git a/gcc/testsuite/gcc.dg/ifcvt-3.c b/gcc/testsuite/gcc.dg/ifcvt-3.c index 44233d4..b250bc1 100644 --- a/gcc/testsuite/gcc.dg/ifcvt-3.c +++ b/gcc/testsuite/gcc.dg/ifcvt-3.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { { aarch64*-*-* i?86-*-* x86_64-*-* } && lp64 } } } */ -/* { dg-options "-fdump-rtl-ce1 -O2" } */ +/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-unpredictable-cost=100" } */ typedef long long s64; diff --git a/gcc/testsuite/gcc.dg/pr68435.c b/gcc/testsuite/gcc.dg/pr68435.c index 765699a..f86b7f8 100644 --- a/gcc/testsuite/gcc.dg/pr68435.c +++ b/gcc/testsuite/gcc.dg/pr68435.c @@ -1,5 +1,5 @@ /* { dg-do compile { target aarch64*-*-* x86_64-*-* } } */ -/* { dg-options "-fdump-rtl-ce1 -O2 -w" } */ +/* { dg-options "-fdump-rtl-ce1 -O2 -w --param max-rtl-if-conversion-unpredictable-cost=100" } */ typedef struct cpp_reader cpp_reader; enum cpp_ttype