This patch makes scheduling not reorder any RTX_FRAME_RELATED_P insns (relative to each other), to fix PR78029. I originally was a bit worried that this would degrade code quality, but it seems to even improve it: more other insns are scheduled between the prologue insns.
The problem in PR78029: We have two insns, in this order: (insn/f 300 299 267 8 (set (reg:DI 65 lr) (reg:DI 0 0)) 579 {*movdi_internal64} (expr_list:REG_DEAD (reg:DI 0 0) (expr_list:REG_CFA_RESTORE (reg:DI 65 lr) (nil)))) ... (insn/f 310 268 134 8 (set (mem/c:DI (plus:DI (reg/f:DI 1 1) (const_int 144 [0x90])) [6 S8 A8]) (reg:DI 0 0)) 579 {*movdi_internal64} (expr_list:REG_DEAD (reg:DI 0 0) (expr_list:REG_CFA_OFFSET (set (mem/c:DI (plus:DI (reg/f:DI 1 1) (const_int 144 [0x90])) [6 S8 A8]) (reg:DI 65 lr)) (nil)))) and sched swaps them (when compiling for power6, it tries to put memory stores together, so insn 310 is moved up past 300 to go together with some other store). But the REG_CFA_RESTORE and REG_CFA_OFFSET cannot be swapped (they both say where the orig value of LR now lives). Tested on powerpc64-linux {-m32,-m64}, no regressions. Is this okay for trunk? Segher 2016-10-25 Segher Boessenkool <seg...@kernel.crashing.org> PR rtl-optimization/78029 * sched-deps.c (sched_analyze_insn): Mark any RTX_FRAME_RELATED_P insn as depending on the previous one. (init_deps): Initialize last_frame_related. * sched-int.h (struct deps_desc): Add last_frame_related field. --- gcc/sched-deps.c | 13 +++++++++++++ gcc/sched-int.h | 3 +++ 2 files changed, 16 insertions(+) diff --git a/gcc/sched-deps.c b/gcc/sched-deps.c index 6cd8332..b6341b5 100644 --- a/gcc/sched-deps.c +++ b/gcc/sched-deps.c @@ -3502,6 +3502,18 @@ sched_analyze_insn (struct deps_desc *deps, rtx x, rtx_insn *insn) if (!deps->readonly) deps->last_args_size = insn; } + + /* Prologue and epilogue insns need to stay in the order we have emitted + them in. Any attached CFI notes would also be reordered, which can + be fatal. This also encourages unrelated insns to be scheduled in + between the *logue insns. */ + if (RTX_FRAME_RELATED_P (insn)) + { + if (deps->last_frame_related) + add_dependence (insn, deps->last_frame_related, REG_DEP_OUTPUT); + if (!deps->readonly) + deps->last_frame_related = insn; + } } /* Return TRUE if INSN might not always return normally (e.g. call exit, @@ -3907,6 +3919,7 @@ init_deps (struct deps_desc *deps, bool lazy_reg_last) deps->in_post_call_group_p = not_post_call; deps->last_debug_insn = 0; deps->last_args_size = 0; + deps->last_frame_related = 0; deps->last_reg_pending_barrier = NOT_A_BARRIER; deps->readonly = 0; } diff --git a/gcc/sched-int.h b/gcc/sched-int.h index b4a7f92..dd641f2 100644 --- a/gcc/sched-int.h +++ b/gcc/sched-int.h @@ -537,6 +537,9 @@ struct deps_desc /* The last insn bearing REG_ARGS_SIZE that we've seen. */ rtx_insn *last_args_size; + /* The last FRAME_RELATED insn that we have seen. */ + rtx_insn *last_frame_related; + /* The maximum register number for the following arrays. Before reload this is max_reg_num; after reload it is FIRST_PSEUDO_REGISTER. */ int max_reg; -- 1.9.3