This stage adds the fields sp_realigned and sp_realigned_offset to struct machine_frame_state and adds the concept of the stack pointer being re-aligned rather than invalid. The inline functions sp_valid_at and fp_valid_at are added to test if a given location relative to the CFA can be accessed with the stack or frame pointer, respectively.
Stack allocation prior to re-alignment is modified so that we allocate what is needed, but don't allocate unneeded space in the event that no SSE registers are saved, but frame.sse_reg_save_offset is increased for alignment. As this change only alters how SSE registers are saved, moving the re-alignment AND should not hinder parallelization of int register saves. --- gcc/config/i386/i386.c | 69 ++++++++++++++++++++++++++++++++++++-------------- gcc/config/i386/i386.h | 12 +++++++++ 2 files changed, 62 insertions(+), 19 deletions(-) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 7f7389cbe31..b5f9f36094f 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12604,6 +12604,24 @@ choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset) return len; } +/* Determine if the stack pointer is valid for accessing the cfa_offset. */ + +static inline bool sp_valid_at (HOST_WIDE_INT cfa_offset) +{ + const struct machine_frame_state &fs = cfun->machine->fs; + return fs.sp_valid && !(fs.sp_realigned + && cfa_offset < fs.sp_realigned_offset); +} + +/* Determine if the frame pointer is valid for accessing the cfa_offset. */ + +static inline bool fp_valid_at (HOST_WIDE_INT cfa_offset) +{ + const struct machine_frame_state &fs = cfun->machine->fs; + return fs.fp_valid && !(fs.sp_valid && fs.sp_realigned + && cfa_offset >= fs.sp_realigned_offset); +} + /* Return an RTX that points to CFA_OFFSET within the stack frame. The valid base registers are taken from CFUN->MACHINE->FS. */ @@ -12902,15 +12920,18 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, { HOST_WIDE_INT ooffset = m->fs.sp_offset; bool valid = m->fs.sp_valid; + bool realigned = m->fs.sp_realigned; if (src == hard_frame_pointer_rtx) { valid = m->fs.fp_valid; + realigned = false; ooffset = m->fs.fp_offset; } else if (src == crtl->drap_reg) { valid = m->fs.drap_valid; + realigned = false; ooffset = 0; } else @@ -12924,6 +12945,7 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, m->fs.sp_offset = ooffset - INTVAL (offset); m->fs.sp_valid = valid; + m->fs.sp_realigned = realigned; } } @@ -13673,6 +13695,7 @@ ix86_expand_prologue (void) this is fudged; we're interested to offsets within the local frame. */ m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; m->fs.sp_valid = true; + m->fs.sp_realigned = false; ix86_compute_frame_layout (&frame); @@ -13889,11 +13912,10 @@ ix86_expand_prologue (void) that we must allocate the size of the register save area before performing the actual alignment. Otherwise we cannot guarantee that there's enough storage above the realignment point. */ - if (m->fs.sp_offset != frame.sse_reg_save_offset) + allocate = frame.stack_realign_allocate_offset - m->fs.sp_offset; + if (allocate) pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, - GEN_INT (m->fs.sp_offset - - frame.sse_reg_save_offset), - -1, false); + GEN_INT (-allocate), -1, false); /* Align the stack. */ insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx, @@ -13901,11 +13923,14 @@ ix86_expand_prologue (void) GEN_INT (-align_bytes))); /* For the purposes of register save area addressing, the stack - pointer is no longer valid. As for the value of sp_offset, - see ix86_compute_frame_layout, which we need to match in order - to pass verification of stack_pointer_offset at the end. */ + pointer can no longer be used to access anything in the frame + below m->fs.sp_realigned_offset and the frame pointer cannot be + used for anything at or above. */ + gcc_assert (m->fs.sp_offset == frame.stack_realign_allocate_offset); m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes); - m->fs.sp_valid = false; + m->fs.sp_realigned = true; + m->fs.sp_realigned_offset = m->fs.sp_offset - frame.nsseregs * 16; + gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset); } allocate = frame.stack_pointer_offset - m->fs.sp_offset; @@ -14244,6 +14269,7 @@ ix86_emit_leave (void) gcc_assert (m->fs.fp_valid); m->fs.sp_valid = true; + m->fs.sp_realigned = false; m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD; m->fs.fp_valid = false; @@ -14344,9 +14370,10 @@ ix86_expand_epilogue (int style) ix86_finalize_stack_realign_flags (); ix86_compute_frame_layout (&frame); - m->fs.sp_valid = (!frame_pointer_needed - || (crtl->sp_is_unchanging - && !stack_realign_fp)); + m->fs.sp_realigned = stack_realign_fp; + m->fs.sp_valid = stack_realign_fp + || !frame_pointer_needed + || crtl->sp_is_unchanging; gcc_assert (!m->fs.sp_valid || m->fs.sp_offset == frame.stack_pointer_offset); @@ -14396,10 +14423,10 @@ ix86_expand_epilogue (int style) /* SEH requires the use of pops to identify the epilogue. */ else if (TARGET_SEH) restore_regs_via_mov = false; - /* If we're only restoring one register and sp is not valid then + /* If we're only restoring one register and sp cannot be used then using a move instruction to restore the register since it's less work than reloading sp and popping the register. */ - else if (!m->fs.sp_valid && frame.nregs <= 1) + else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1) restore_regs_via_mov = true; else if (TARGET_EPILOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue @@ -14424,7 +14451,7 @@ ix86_expand_epilogue (int style) the stack pointer, if we will restore via sp. */ if (TARGET_64BIT && m->fs.sp_offset > 0x7fffffff - && !(m->fs.fp_valid || m->fs.drap_valid) + && !(fp_valid_at (frame.stack_realign_offset) || m->fs.drap_valid) && (frame.nsseregs + frame.nregs) != 0) { pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, @@ -14510,6 +14537,7 @@ ix86_expand_epilogue (int style) } m->fs.sp_offset = UNITS_PER_WORD; m->fs.sp_valid = true; + m->fs.sp_realigned = false; } } else @@ -14531,10 +14559,11 @@ ix86_expand_epilogue (int style) } /* First step is to deallocate the stack frame so that we can - pop the registers. Also do it on SEH target for very large - frame as the emitted instructions aren't allowed by the ABI in - epilogues. */ - if (!m->fs.sp_valid + pop the registers. If the stack pointer was realigned, it needs + to be restored now. Also do it on SEH target for very large + frame as the emitted instructions aren't allowed by the ABI + in epilogues. */ + if (!m->fs.sp_valid || m->fs.sp_realigned || (TARGET_SEH && (m->fs.sp_offset - frame.reg_save_offset >= SEH_MAX_FRAME_SIZE))) @@ -14562,7 +14591,8 @@ ix86_expand_epilogue (int style) { /* If the stack pointer is valid and pointing at the frame pointer store address, then we only need a pop. */ - if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset) + if (sp_valid_at (frame.hfp_save_offset) + && m->fs.sp_offset == frame.hfp_save_offset) ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); /* Leave results in shorter dependency chains on CPUs that are able to grok it fast. */ @@ -14616,6 +14646,7 @@ ix86_expand_epilogue (int style) be possible to merge the local stack deallocation with the deallocation forced by ix86_static_chain_on_stack. */ gcc_assert (m->fs.sp_valid); + gcc_assert (!m->fs.sp_realigned); gcc_assert (!m->fs.fp_valid); gcc_assert (!m->fs.realigned); if (m->fs.sp_offset != UNITS_PER_WORD) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 5f5368da96d..72b0d89e22c 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2498,6 +2498,18 @@ struct GTY(()) machine_frame_state set, the SP/FP offsets above are relative to the aligned frame and not the CFA. */ BOOL_BITFIELD realigned : 1; + + /* Indicates that the stack pointer has been realigned and sp_offset + rounded up to the nearest alignment boundary. Unlike `realigned` + above, this does not realign the hard frame pointer and is not + treated like a new local stack frame. */ + BOOL_BITFIELD sp_realigned : 1; + + /* The offset (from the CFA) the stack pointer was realigned to. When + sp_realigned is true, the stack pointer may be used to address + memory at or above this offset, but may not be used to address memory + below it. */ + HOST_WIDE_INT sp_realigned_offset; }; /* Private to winnt.c. */ -- 2.11.0