This implements the hooks for separate shrink-wrapping for rs6000. It handles GPRs and LR. The GPRs get a concern number corresponding to their register number; LR gets concern number 0.
This improves specint by 0.9%, specfp by 0.8%, some separate benchmarks much more (on POWER8). It improves the hot path in various interpreters, and e.g. in glibc's malloc. 2016-06-07 Segher Boessenkool <seg...@kernel.crashing.org> * config/rs6000/rs6000.c (machine_function): Add new fields gpr_is_wrapped_separately and lr_is_wrapped_separately. (TARGET_SHRINK_WRAP_GET_SEPARATE_CONCERNS, TARGET_SHRINK_WRAP_CONCERNS_FOR_BB, TARGET_SHRINK_WRAP_DISQUALIFY_CONCERNS, TARGET_SHRINK_WRAP_EMIT_PROLOGUE_CONCERNS, TARGET_SHRINK_WRAP_EMIT_EPILOGUE_CONCERNS, TARGET_SHRINK_WRAP_SET_HANDLED_CONCERNS): Define. (rs6000_get_separate_concerns): New function. (rs6000_concerns_for_bb): New function. (rs6000_disqualify_concerns): New function. (rs6000_emit_prologue_concerns): New function. (rs6000_emit_epilogue_concerns): New function. (rs6000_set_handled_concerns): New function. (rs6000_emit_prologue): Don't emit LR save if lr_is_wrapped_separately. Don't emit GPR saves if gpr_is_wrapped_separately for that register. (restore_saved_lr): Don't restore LR if lr_is_wrapped_separately. (rs6000_emit_epilogue): Don't emit GPR restores if gpr_is_wrapped_separately for that register. Don't make a REG_CFA_RESTORE note for registers we did not restore, either. --- gcc/config/rs6000/rs6000.c | 257 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 242 insertions(+), 15 deletions(-) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index c6b2b6a..af56d8e 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -152,6 +152,10 @@ typedef struct GTY(()) machine_function bool split_stack_argp_used; /* Flag if r2 setup is needed with ELFv2 ABI. */ bool r2_setup_needed; + /* The concerns already handled by separate shrink-wrapping, which should + not be considered by the prologue and epilogue. */ + bool gpr_is_wrapped_separately[32]; + bool lr_is_wrapped_separately; } machine_function; /* Support targetm.vectorize.builtin_mask_for_load. */ @@ -1511,6 +1515,19 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_SET_UP_BY_PROLOGUE #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue +#undef TARGET_SHRINK_WRAP_GET_SEPARATE_CONCERNS +#define TARGET_SHRINK_WRAP_GET_SEPARATE_CONCERNS rs6000_get_separate_concerns +#undef TARGET_SHRINK_WRAP_CONCERNS_FOR_BB +#define TARGET_SHRINK_WRAP_CONCERNS_FOR_BB rs6000_concerns_for_bb +#undef TARGET_SHRINK_WRAP_DISQUALIFY_CONCERNS +#define TARGET_SHRINK_WRAP_DISQUALIFY_CONCERNS rs6000_disqualify_concerns +#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_CONCERNS +#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_CONCERNS rs6000_emit_prologue_concerns +#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_CONCERNS +#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_CONCERNS rs6000_emit_epilogue_concerns +#undef TARGET_SHRINK_WRAP_SET_HANDLED_CONCERNS +#define TARGET_SHRINK_WRAP_SET_HANDLED_CONCERNS rs6000_set_handled_concerns + #undef TARGET_EXTRA_LIVE_ON_ENTRY #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry @@ -26111,6 +26128,201 @@ rs6000_global_entry_point_needed_p (void) return cfun->machine->r2_setup_needed; } +/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_CONCERNS. */ +static sbitmap +rs6000_get_separate_concerns (void) +{ + rs6000_stack_t *info = rs6000_stack_info (); + + if (!(info->savres_strategy & SAVE_INLINE_GPRS) + || !(info->savres_strategy & REST_INLINE_GPRS) + || WORLD_SAVE_P (info)) + return NULL; + + sbitmap concerns = sbitmap_alloc (32); + bitmap_clear (concerns); + + /* The GPRs we need saved to the frame. */ + int reg_size = TARGET_32BIT ? 4 : 8; + int offset = info->gp_save_offset; + if (info->push_p) + offset += info->total_size; + + for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++) + { + if (IN_RANGE (offset, -0x8000, 0x7fff) + && rs6000_reg_live_or_pic_offset_p (regno)) + bitmap_set_bit (concerns, regno); + + offset += reg_size; + } + + /* Don't mess with the hard frame pointer. */ + if (frame_pointer_needed) + bitmap_clear_bit (concerns, HARD_FRAME_POINTER_REGNUM); + + /* Don't mess with the fixed TOC register. */ + if ((TARGET_TOC && TARGET_MINIMAL_TOC) + || (flag_pic == 1 && DEFAULT_ABI == ABI_V4) + || (flag_pic && DEFAULT_ABI == ABI_DARWIN)) + bitmap_clear_bit (concerns, RS6000_PIC_OFFSET_TABLE_REGNUM); + + /* Optimize LR save and restore if we can. This is concern 0. */ + if (info->lr_save_p + && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))) + { + offset = info->lr_save_offset; + if (info->push_p) + offset += info->total_size; + if (IN_RANGE (offset, -0x8000, 0x7fff)) + bitmap_set_bit (concerns, 0); + } + + return concerns; +} + +/* Implement TARGET_SHRINK_WRAP_CONCERNS_FOR_BB. */ +static sbitmap +rs6000_concerns_for_bb (basic_block bb) +{ + rs6000_stack_t *info = rs6000_stack_info (); + + bitmap in = DF_LIVE_IN (bb); + bitmap gen = &DF_LIVE_BB_INFO (bb)->gen; + bitmap kill = &DF_LIVE_BB_INFO (bb)->kill; + + sbitmap concerns = sbitmap_alloc (32); + bitmap_clear (concerns); + + /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */ + for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++) + if (bitmap_bit_p (in, regno) + || bitmap_bit_p (gen, regno) + || bitmap_bit_p (kill, regno)) + bitmap_set_bit (concerns, regno); + + /* LR needs to be saved around a bb if it is killed in that bb. */ + if (bitmap_bit_p (kill, LR_REGNO)) + bitmap_set_bit (concerns, 0); + + return concerns; +} + +/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_CONCERNS. */ +static void +rs6000_disqualify_concerns (sbitmap concerns, edge e, sbitmap edge_concerns, + bool /*is_prologue*/) +{ + /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be + live where we want to place that code. */ + if (bitmap_bit_p (edge_concerns, 0) + && bitmap_bit_p (DF_LIVE_IN (e->dest), 0)) + { + if (dump_file) + fprintf (dump_file, "Disqualifying LR because GPR0 is live " + "on entry to bb %d\n", e->dest->index); + bitmap_clear_bit (concerns, 0); + } +} + +/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_CONCERNS. */ +static void +rs6000_emit_prologue_concerns (sbitmap concerns) +{ + rs6000_stack_t *info = rs6000_stack_info (); + rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); + int reg_size = TARGET_32BIT ? 4 : 8; + + /* Prologue for LR. */ + if (bitmap_bit_p (concerns, 0)) + { + rtx reg = gen_rtx_REG (Pmode, 0); + emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO)); + RTX_FRAME_RELATED_P (get_last_insn ()) = 1; + + int offset = info->lr_save_offset; + if (info->push_p) + offset += info->total_size; + + emit_insn (gen_frame_store (reg, sp_reg_rtx, offset)); + RTX_FRAME_RELATED_P (get_last_insn ()) = 1; + } + + /* Prologue for the GPRs. */ + int offset = info->gp_save_offset; + if (info->push_p) + offset += info->total_size; + + for (int i = info->first_gp_reg_save; i < 32; i++) + { + if (bitmap_bit_p (concerns, i)) + { + rtx reg = gen_rtx_REG (Pmode, i); + emit_insn (gen_frame_store (reg, sp_reg_rtx, offset)); + RTX_FRAME_RELATED_P (get_last_insn ()) = 1; + } + + offset += reg_size; + } +} + +/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_CONCERNS. */ +static void +rs6000_emit_epilogue_concerns (sbitmap concerns) +{ + rs6000_stack_t *info = rs6000_stack_info (); + rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); + int reg_size = TARGET_32BIT ? 4 : 8; + + /* Epilogue for the GPRs. */ + int offset = info->gp_save_offset; + if (info->push_p) + offset += info->total_size; + + for (int i = info->first_gp_reg_save; i < 32; i++) + { + if (bitmap_bit_p (concerns, i)) + { + rtx reg = gen_rtx_REG (Pmode, i); + emit_insn (gen_frame_load (reg, sp_reg_rtx, offset)); + RTX_FRAME_RELATED_P (get_last_insn ()) = 1; + add_reg_note (get_last_insn (), REG_CFA_RESTORE, reg); + } + + offset += reg_size; + } + + /* Epilogue for LR. */ + if (bitmap_bit_p (concerns, 0)) + { + int offset = info->lr_save_offset; + if (info->push_p) + offset += info->total_size; + + rtx reg = gen_rtx_REG (Pmode, 0); + emit_insn (gen_frame_load (reg, sp_reg_rtx, offset)); + + rtx lr = gen_rtx_REG (Pmode, LR_REGNO); + emit_move_insn (lr, reg); + RTX_FRAME_RELATED_P (get_last_insn ()) = 1; + add_reg_note (get_last_insn (), REG_CFA_RESTORE, lr); + } +} + +/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_CONCERNS. */ +static void +rs6000_set_handled_concerns (sbitmap concerns) +{ + rs6000_stack_t *info = rs6000_stack_info (); + + for (int i = info->first_gp_reg_save; i < 32; i++) + if (bitmap_bit_p (concerns, i)) + cfun->machine->gpr_is_wrapped_separately[i] = true; + + if (bitmap_bit_p (concerns, 0)) + cfun->machine->lr_is_wrapped_separately = true; +} + /* Emit function prologue as insns. */ void @@ -26368,7 +26580,8 @@ rs6000_emit_prologue (void) } /* If we use the link register, get it into r0. */ - if (!WORLD_SAVE_P (info) && info->lr_save_p) + if (!WORLD_SAVE_P (info) && info->lr_save_p + && !cfun->machine->lr_is_wrapped_separately) { rtx addr, reg, mem; @@ -26596,13 +26809,16 @@ rs6000_emit_prologue (void) } else if (!WORLD_SAVE_P (info)) { - int i; - for (i = 0; i < 32 - info->first_gp_reg_save; i++) - if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i)) - emit_frame_save (frame_reg_rtx, reg_mode, - info->first_gp_reg_save + i, - info->gp_save_offset + frame_off + reg_size * i, - sp_off - frame_off); + int offset = info->gp_save_offset + frame_off; + for (int i = info->first_gp_reg_save; i < 32; i++) + { + if (rs6000_reg_live_or_pic_offset_p (i) + && !cfun->machine->gpr_is_wrapped_separately[i]) + emit_frame_save (frame_reg_rtx, reg_mode, i, offset, + sp_off - frame_off); + + offset += reg_size; + } } if (crtl->calls_eh_return) @@ -27407,6 +27623,9 @@ load_lr_save (int regno, rtx frame_reg_rtx, int offset) static void restore_saved_lr (int regno, bool exit_func) { + if (cfun->machine->lr_is_wrapped_separately) + return; + rtx reg = gen_rtx_REG (Pmode, regno); rtx lr = gen_rtx_REG (Pmode, LR_REGNO); rtx_insn *insn = emit_move_insn (lr, reg); @@ -28164,12 +28383,18 @@ rs6000_emit_epilogue (int sibcall) } else { - for (i = 0; i < 32 - info->first_gp_reg_save; i++) - if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i)) - emit_insn (gen_frame_load - (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i), - frame_reg_rtx, - info->gp_save_offset + frame_off + reg_size * i)); + int offset = info->gp_save_offset + frame_off; + for (i = info->first_gp_reg_save; i < 32; i++) + { + if (rs6000_reg_live_or_pic_offset_p (i) + && !cfun->machine->gpr_is_wrapped_separately[i]) + { + rtx reg = gen_rtx_REG (reg_mode, i); + emit_insn (gen_frame_load (reg, frame_reg_rtx, offset)); + } + + offset += reg_size; + } } if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap) @@ -28208,8 +28433,10 @@ rs6000_emit_epilogue (int sibcall) || using_load_multiple || rs6000_reg_live_or_pic_offset_p (i)) { - rtx reg = gen_rtx_REG (reg_mode, i); + if (cfun->machine->gpr_is_wrapped_separately[i]) + continue; + rtx reg = gen_rtx_REG (reg_mode, i); cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); } } -- 1.9.3