Rigt now, mem_cost for separate mem alternative is 1 * frequency which is pretty small and caused the unnecessary SSE spill in the PR, I've tried to rework backend cost model, but RA still not happy with that(regress somewhere else). I think the root cause of this is cost for separate 'm' alternative cost is too small, especially considering that the mov cost of gpr are 2(default for REGISTER_MOVE_COST). So this patch increase mem_cost to 2*frequency, also increase 1 for reg_class cost when m alternative.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. Ok for trunk? gcc/ChangeLog: PR target/105513 * ira-costs.cc (record_reg_classes): Increase both mem_cost and reg class cost by 1 for separate mem alternative when REG_P (op). gcc/testsuite/ChangeLog: * gcc.target/i386/pr105513-1.c: New test. --- gcc/ira-costs.cc | 26 +++++++++++++--------- gcc/testsuite/gcc.target/i386/pr105513-1.c | 16 +++++++++++++ 2 files changed, 31 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr105513-1.c diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc index 964c94a06ef..f7b8325e195 100644 --- a/gcc/ira-costs.cc +++ b/gcc/ira-costs.cc @@ -625,7 +625,8 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, for (k = cost_classes_ptr->num - 1; k >= 0; k--) { rclass = cost_classes[k]; - pp_costs[k] = mem_cost[rclass][0] * frequency; + pp_costs[k] = (mem_cost[rclass][0] + + 1) * frequency; } } else @@ -648,7 +649,8 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, for (k = cost_classes_ptr->num - 1; k >= 0; k--) { rclass = cost_classes[k]; - pp_costs[k] = mem_cost[rclass][1] * frequency; + pp_costs[k] = (mem_cost[rclass][1] + + 1) * frequency; } } else @@ -670,9 +672,9 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, for (k = cost_classes_ptr->num - 1; k >= 0; k--) { rclass = cost_classes[k]; - pp_costs[k] = ((mem_cost[rclass][0] - + mem_cost[rclass][1]) - * frequency); + pp_costs[k] = (mem_cost[rclass][0] + + mem_cost[rclass][1] + + 2) * frequency; } } else @@ -861,7 +863,8 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, for (k = cost_classes_ptr->num - 1; k >= 0; k--) { rclass = cost_classes[k]; - pp_costs[k] = mem_cost[rclass][0] * frequency; + pp_costs[k] = (mem_cost[rclass][0] + + 1) * frequency; } } else @@ -884,7 +887,8 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, for (k = cost_classes_ptr->num - 1; k >= 0; k--) { rclass = cost_classes[k]; - pp_costs[k] = mem_cost[rclass][1] * frequency; + pp_costs[k] = (mem_cost[rclass][1] + + 1) * frequency; } } else @@ -906,9 +910,9 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, for (k = cost_classes_ptr->num - 1; k >= 0; k--) { rclass = cost_classes[k]; - pp_costs[k] = ((mem_cost[rclass][0] - + mem_cost[rclass][1]) - * frequency); + pp_costs[k] = (mem_cost[rclass][0] + + mem_cost[rclass][1] + + 2) * frequency; } } else @@ -929,7 +933,7 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, /* Although we don't need insn to reload from memory, still accessing memory is usually more expensive than a register. */ - pp->mem_cost = frequency; + pp->mem_cost = 2 * frequency; else /* If the alternative actually allows memory, make things a bit cheaper since we won't need an diff --git a/gcc/testsuite/gcc.target/i386/pr105513-1.c b/gcc/testsuite/gcc.target/i386/pr105513-1.c new file mode 100644 index 00000000000..530f5292252 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr105513-1.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -msse2 -mtune=skylake -mfpmath=sse" } */ +/* { dg-final { scan-assembler-not "\\(%rsp\\)" } } */ + +static int as_int(float x) +{ + return (union{float x; int i;}){x}.i; +} + +float f(double y, float x) +{ + int i = as_int(x); + if (__builtin_expect(i > 99, 0)) return 0; + if (i*2u < 77) if (i==2) return 0; + return y*x; +} -- 2.18.1