Hello, this patch adds another ELFv2 ABI feature: explicit tracking of CR fields in DWARF CFI.
In the current ABI, DWARF CFI contains only a single record describing the save location of the whole CR field. It is implicit that all (or at least all call-clobbered) fields are present at that location. Now, if you use the instructions that save and restore the whole CR at once, this approach might seem reasonable. Unfortunately, with current POWER processors, those instructions tend to be significantly slower that those that access only single CR fields. In particular in routines where only one or two CR fields are actually clobbered and need to be saved, we could improve performance of prolog and epilog code by saving/restoring only selected CR fields. However, this is not possible in the current ABI since there is no way to describe this fact in the CFI. With the ELFv2 ABI, every CR field gets its own CFI record (using the register numbers 68 .. 75 to stand for CR0 .. CR7). Now, those fields will still usually be saved in the same 4-byte field on the stack. The semantics of a CFI record for field CRx is that the memory location holds 4 bytes, and the 4-bit nibble corresponding to CRx within those 4 bytes hold the CRx value to be restored. The one problem with this scheme is the way uw_install_context tries to modify saved valued when unwinding the stack: it will simply copy over the whole field into the save slot of the unwinder routine (that calls __builtin_eh_return). This clearly does not work if multiple CR fields need to be restored independently. To fix this, the prolog/epilog code for unwinder routines will use *multiple* stack slots, one for each call-saved CR fields, and save and restore those fields to and from their own slot. This will allow uw_install_context to install values for multiple fields. (Note that there is already precedent for unwinder routines being treated specially in the rs6000.c prologue/epilogue code ...) Bye, Ulrich gcc/ChangeLog: 2013-11-11 Ulrich Weigand <ulrich.weig...@de.ibm.com> * config/rs6000/rs6000.c (struct rs6000_stack): New member ehcr_offset. (rs6000_stack_info): For ABI_ELFv2, allocate space for separate CR field save areas if the function calls __builtin_eh_return. (rs6000_emit_move_from_cr): New function. (rs6000_emit_prologue): Use it. For ABI_ELFv2, generate separate CFI records for each saved CR field. For functions that call __builtin_eh_return, save all CR fields into separate slots. (restore_saved_cr): For ABI_ELFv2, generate separate CFA_RESTORE entries for each saved CR field. (add_crlr_cfa_restore): Likewise. (rs6000_emit_epilogue): For ABI_ELFv2, if the function calls __builtin_eh_return, restore each CR field from its own slot. libgcc/ChangeLog: 2013-11-11 Ulrich Weigand <ulrich.weig...@de.ibm.com> * config/rs6000/linux-unwind.h (R_CR3, R_CR4): New macros. (ppc_fallback_frame_state) [_CALL_ELF == 2]: Create CFI entry for CR3 and CR4. Index: gcc/gcc/config/rs6000/rs6000.c =================================================================== --- gcc.orig/gcc/config/rs6000/rs6000.c +++ gcc/gcc/config/rs6000/rs6000.c @@ -97,6 +97,7 @@ typedef struct rs6000_stack { int spe_gp_save_offset; /* offset to save spe 64-bit gprs */ int varargs_save_offset; /* offset to save the varargs registers */ int ehrd_offset; /* offset to EH return data */ + int ehcr_offset; /* offset to EH CR field data */ int reg_size; /* register size (4 or 8) */ HOST_WIDE_INT vars_size; /* variable save area size */ int parm_size; /* outgoing parameter size */ @@ -19847,6 +19848,7 @@ rs6000_stack_info (void) rs6000_stack_t *info_ptr = &stack_info; int reg_size = TARGET_32BIT ? 4 : 8; int ehrd_size; + int ehcr_size; int save_align; int first_gp; HOST_WIDE_INT non_fixed_size; @@ -19940,6 +19942,18 @@ rs6000_stack_info (void) else ehrd_size = 0; + /* In the ELFv2 ABI, we also need to allocate space for separate + CR field save areas if the function calls __builtin_eh_return. */ + if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return) + { + /* This hard-codes that we have three call-saved CR fields. */ + ehcr_size = 3 * reg_size; + /* We do *not* use the regular CR save mechanism. */ + info_ptr->cr_save_p = 0; + } + else + ehcr_size = 0; + /* Determine various sizes. */ info_ptr->reg_size = reg_size; info_ptr->fixed_size = RS6000_SAVE_AREA; @@ -20009,6 +20023,8 @@ rs6000_stack_info (void) } else info_ptr->ehrd_offset = info_ptr->gp_save_offset - ehrd_size; + + info_ptr->ehcr_offset = info_ptr->ehrd_offset - ehcr_size; info_ptr->cr_save_offset = reg_size; /* first word when 64-bit. */ info_ptr->lr_save_offset = 2*reg_size; break; @@ -20071,6 +20087,7 @@ rs6000_stack_info (void) + info_ptr->spe_gp_size + info_ptr->spe_padding_size + ehrd_size + + ehcr_size + info_ptr->cr_size + info_ptr->vrsave_size, save_align); @@ -21522,6 +21539,43 @@ rs6000_emit_savres_rtx (rs6000_stack_t * return insn; } +/* Emit code to store CR fields that need to be saved into REG. */ + +static void +rs6000_emit_move_from_cr (rtx reg) +{ + /* Only the ELFv2 ABI allows storing only selected fields. */ + if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF) + { + int i, cr_reg[8], count = 0; + + /* Collect CR fields that must be saved. */ + for (i = 0; i < 8; i++) + if (save_reg_p (CR0_REGNO + i)) + cr_reg[count++] = i; + + /* If it's just a single one, use mfcrf. */ + if (count == 1) + { + rtvec p = rtvec_alloc (1); + rtvec r = rtvec_alloc (2); + RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]); + RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0])); + RTVEC_ELT (p, 0) + = gen_rtx_SET (VOIDmode, reg, + gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR)); + + emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); + return; + } + + /* ??? It might be better to handle count == 2 / 3 cases here + as well, using logical operations to combine the values. */ + } + + emit_insn (gen_movesi_from_cr (reg)); +} + /* Determine whether the gp REG is really used. */ static bool @@ -21826,7 +21880,7 @@ rs6000_emit_prologue (void) { cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno); START_USE (cr_save_regno); - emit_insn (gen_movesi_from_cr (cr_save_rtx)); + rs6000_emit_move_from_cr (cr_save_rtx); } /* Do any required saving of fpr's. If only one or two to save, do @@ -22143,7 +22197,7 @@ rs6000_emit_prologue (void) { START_USE (0); cr_save_rtx = gen_rtx_REG (SImode, 0); - emit_insn (gen_movesi_from_cr (cr_save_rtx)); + rs6000_emit_move_from_cr (cr_save_rtx); } /* Saving CR requires a two-instruction sequence: one instruction @@ -22187,12 +22241,71 @@ rs6000_emit_prologue (void) GEN_INT (info->cr_save_offset + sp_off)); mem = gen_frame_mem (SImode, addr); - /* We still cannot express that multiple CR fields are saved in the - CR save slot. By convention, we use a single CR regnum to represent - the fact that all call-saved CR fields are saved. We use CR2_REGNO - to be compatible with gcc-2.95 on Linux. */ - rtx set = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, CR2_REGNO)); - add_reg_note (insn, REG_FRAME_RELATED_EXPR, set); + if (DEFAULT_ABI == ABI_ELFv2) + { + /* In the ELFv2 ABI we generate separate CFI records for each + CR field that was actually saved. They all point to the + same 32-bit stack slot. */ + rtx crframe[8]; + int n_crframe = 0; + + for (i = 0; i < 8; i++) + if (save_reg_p (CR0_REGNO + i)) + { + crframe[n_crframe] + = gen_rtx_SET (VOIDmode, mem, + gen_rtx_REG (SImode, CR0_REGNO + i)); + + RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1; + n_crframe++; + } + + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_PARALLEL (VOIDmode, + gen_rtvec_v (n_crframe, crframe))); + } + else + { + /* In other ABIs, by convention, we use a single CR regnum to + represent the fact that all call-saved CR fields are saved. + We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */ + rtx set = gen_rtx_SET (VOIDmode, mem, + gen_rtx_REG (SImode, CR2_REGNO)); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, set); + } + } + + /* In the ELFv2 ABI we need to save all call-saved CR fields into + *separate* slots if the routine calls __builtin_eh_return, so + that they can be independently restored by the unwinder. */ + if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return) + { + int i, cr_off = info->ehcr_offset; + rtx crsave; + + /* ??? We might get better performance by using multiple mfocrf + instructions. */ + crsave = gen_rtx_REG (SImode, 0); + emit_insn (gen_movesi_from_cr (crsave)); + + for (i = 0; i < 8; i++) + if (!call_used_regs[CR0_REGNO + i]) + { + rtvec p = rtvec_alloc (2); + RTVEC_ELT (p, 0) + = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off); + RTVEC_ELT (p, 1) + = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i)); + + insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); + + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i), + sp_reg_rtx, cr_off + sp_off)); + + cr_off += reg_size; + } } /* Update stack and set back pointer unless this is V.4, @@ -22565,6 +22678,7 @@ restore_saved_cr (rtx reg, int using_mfc if (using_mfcr_multiple && count > 1) { + rtx insn; rtvec p; int ndx; @@ -22582,16 +22696,43 @@ restore_saved_cr (rtx reg, int using_mfc gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR)); ndx++; } - emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); + insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); gcc_assert (ndx == count); + + /* For the ELFv2 ABI we generate a CFA_RESTORE for each + CR field separately. */ + if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap) + { + for (i = 0; i < 8; i++) + if (save_reg_p (CR0_REGNO + i)) + add_reg_note (insn, REG_CFA_RESTORE, + gen_rtx_REG (SImode, CR0_REGNO + i)); + + RTX_FRAME_RELATED_P (insn) = 1; + } } else for (i = 0; i < 8; i++) if (save_reg_p (CR0_REGNO + i)) - emit_insn (gen_movsi_to_cr_one (gen_rtx_REG (CCmode, CR0_REGNO + i), - reg)); + { + rtx insn = emit_insn (gen_movsi_to_cr_one + (gen_rtx_REG (CCmode, CR0_REGNO + i), reg)); - if (!exit_func && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)) + /* For the ELFv2 ABI we generate a CFA_RESTORE for each + CR field separately, attached to the insn that in fact + restores this particular CR field. */ + if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap) + { + add_reg_note (insn, REG_CFA_RESTORE, + gen_rtx_REG (SImode, CR0_REGNO + i)); + + RTX_FRAME_RELATED_P (insn) = 1; + } + } + + /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */ + if (!exit_func && DEFAULT_ABI != ABI_ELFv2 + && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)) { rtx insn = get_last_insn (); rtx cr = gen_rtx_REG (SImode, CR2_REGNO); @@ -22632,10 +22773,22 @@ restore_saved_lr (int regno, bool exit_f static rtx add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores) { - if (info->cr_save_p) + if (DEFAULT_ABI == ABI_ELFv2) + { + int i; + for (i = 0; i < 8; i++) + if (save_reg_p (CR0_REGNO + i)) + { + rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i); + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr, + cfa_restores); + } + } + else if (info->cr_save_p) cfa_restores = alloc_reg_note (REG_CFA_RESTORE, gen_rtx_REG (SImode, CR2_REGNO), cfa_restores); + if (info->lr_save_p) cfa_restores = alloc_reg_note (REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNO), @@ -23133,6 +23286,35 @@ rs6000_emit_epilogue (int sibcall) || (!restoring_GPRs_inline && info->first_fp_reg_save == 64)); + /* In the ELFv2 ABI we need to restore all call-saved CR fields from + *separate* slots if the routine calls __builtin_eh_return, so + that they can be independently restored by the unwinder. */ + if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return) + { + int i, cr_off = info->ehcr_offset; + + for (i = 0; i < 8; i++) + if (!call_used_regs[CR0_REGNO + i]) + { + rtx reg = gen_rtx_REG (SImode, 0); + emit_insn (gen_frame_load (reg, frame_reg_rtx, + cr_off + frame_off)); + + insn = emit_insn (gen_movsi_to_cr_one + (gen_rtx_REG (CCmode, CR0_REGNO + i), reg)); + + if (!exit_func && flag_shrink_wrap) + { + add_reg_note (insn, REG_CFA_RESTORE, + gen_rtx_REG (SImode, CR0_REGNO + i)); + + RTX_FRAME_RELATED_P (insn) = 1; + } + + cr_off += reg_size; + } + } + /* Get the old lr if we saved it. If we are restoring registers out-of-line, then the out-of-line routines can do this for us. */ if (restore_lr && restoring_GPRs_inline) Index: gcc/libgcc/config/rs6000/linux-unwind.h =================================================================== --- gcc.orig/libgcc/config/rs6000/linux-unwind.h +++ gcc/libgcc/config/rs6000/linux-unwind.h @@ -24,6 +24,8 @@ #define R_LR 65 #define R_CR2 70 +#define R_CR3 71 +#define R_CR4 72 #define R_VR0 77 #define R_VRSAVE 109 @@ -215,8 +217,16 @@ ppc_fallback_frame_state (struct _Unwind #ifndef __LITTLE_ENDIAN__ cr_offset += sizeof (long) - 4; #endif + /* In the ELFv1 ABI, CR2 stands in for the whole CR. */ fs->regs.reg[R_CR2].how = REG_SAVED_OFFSET; fs->regs.reg[R_CR2].loc.offset = cr_offset; +#if _CALL_ELF == 2 + /* In the ELFv2 ABI, every CR field has a separate CFI entry. */ + fs->regs.reg[R_CR3].how = REG_SAVED_OFFSET; + fs->regs.reg[R_CR3].loc.offset = cr_offset; + fs->regs.reg[R_CR4].how = REG_SAVED_OFFSET; + fs->regs.reg[R_CR4].loc.offset = cr_offset; +#endif fs->regs.reg[R_LR].how = REG_SAVED_OFFSET; fs->regs.reg[R_LR].loc.offset = (long) ®s->link - new_cfa; -- Dr. Ulrich Weigand GNU/Linux compilers and toolchain ulrich.weig...@de.ibm.com