These changes affect how the stack frame is calculated from the region starting at frame.reg_save_offset until frame.frame_pointer_offset, which includes either the stub save area or the (inline) SSE register save area and the va_args register save area.
The calculation used when not realigning the stack pointer is the same, but when when realigning we calculate the 16-byte aligned space needed in reverse so that the stack realignment boundary at frame.stack_realign_offset may not necessarily be a multiple of stack_alignment_needed, but the value of frame.frame_pointer_offset will. This results in a properly aligned stack for the function body and avoids wasting stack space. Signed-off-by: Daniel Santos <daniel.san...@pobox.com> --- gcc/config/i386/i386.c | 116 +++++++++++++++++++++++++++++++++---------------- gcc/config/i386/i386.h | 2 +- 2 files changed, 80 insertions(+), 38 deletions(-) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e2e9546a27c..e92f322de0c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12874,6 +12874,14 @@ ix86_compute_frame_layout (void) gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); gcc_assert (preferred_alignment <= stack_alignment_needed); + /* The only ABI saving SSE regs should be 64-bit ms_abi. */ + gcc_assert (TARGET_64BIT || !frame->nsseregs); + if (TARGET_64BIT && m->call_ms2sysv) + { + gcc_assert (stack_alignment_needed >= 16); + gcc_assert (!frame->nsseregs); + } + /* For SEH we have to limit the amount of code movement into the prologue. At present we do this via a BLOCKAGE, at which point there's very little scheduling that can be done, which means that there's very little point @@ -12936,54 +12944,88 @@ ix86_compute_frame_layout (void) if (TARGET_SEH) frame->hard_frame_pointer_offset = offset; - /* When re-aligning the stack frame, but not saving SSE registers, this - is the offset we want adjust the stack pointer to. */ - frame->stack_realign_allocate_offset = offset; + /* Calculate the size of the va-arg area (not including padding, if any). */ + frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; - /* The re-aligned stack starts here. Values before this point are not - directly comparable with values below this point. Use sp_valid_at - to determine if the stack pointer is valid for a given offset and - fp_valid_at for the frame pointer. */ if (stack_realign_fp) - offset = ROUND_UP (offset, stack_alignment_needed); - frame->stack_realign_offset = offset; - - if (TARGET_64BIT && m->call_ms2sysv) { - gcc_assert (stack_alignment_needed >= 16); - gcc_assert (!frame->nsseregs); + /* We may need a 16-byte aligned stack for the remainder of the + register save area, but the stack frame for the local function + may require a greater alignment if using AVX/2/512. In order + to avoid wasting space, we first calculate the space needed for + the rest of the register saves, add that to the stack pointer, + and then realign the stack to the boundary of the start of the + frame for the local function. */ + HOST_WIDE_INT space_needed = 0; + HOST_WIDE_INT sse_reg_space_needed = 0; - m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD); - offset += xlogue_layout::get_instance ().get_stack_space_used (); - } + if (TARGET_64BIT) + { + if (m->call_ms2sysv) + { + m->call_ms2sysv_pad_in = 0; + space_needed = xlogue_layout::get_instance ().get_stack_space_used (); + } - /* Align and set SSE register save area. */ - else if (frame->nsseregs) - { - /* The only ABI that has saved SSE registers (Win64) also has a - 16-byte aligned default stack. However, many programs violate - the ABI, and Wine64 forces stack realignment to compensate. + else if (frame->nsseregs) + /* The only ABI that has saved SSE registers (Win64) also has a + 16-byte aligned default stack. However, many programs violate + the ABI, and Wine64 forces stack realignment to compensate. */ + space_needed = frame->nsseregs * 16; + + sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16); + + /* 64-bit frame->va_arg_size should always be a multiple of 16, but + rounding to be pedantic. */ + space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16); + } + else + space_needed = frame->va_arg_size; + + /* Record the allocation size required prior to the realignment AND. */ + frame->stack_realign_allocate = space_needed; + + /* The re-aligned stack starts at frame->stack_realign_offset. Values + before this point are not directly comparable with values below + this point. Use sp_valid_at to determine if the stack pointer is + valid for a given offset, fp_valid_at for the frame pointer, or + choose_baseaddr to have a base register chosen for you. - If the incoming stack boundary is at least 16 bytes, or DRAP is - required and the DRAP re-alignment boundary is at least 16 bytes, - then we want the SSE register save area properly aligned. */ - if (ix86_incoming_stack_boundary >= 128 - || (stack_realign_drap && stack_alignment_needed >= 16)) - offset = ROUND_UP (offset, 16); - offset += frame->nsseregs * 16; - frame->stack_realign_allocate_offset = offset; + Note that the result of (frame->stack_realign_offset + & (stack_alignment_needed - 1)) may not equal zero. */ + offset = ROUND_UP (offset + space_needed, stack_alignment_needed); + frame->stack_realign_offset = offset - space_needed; + frame->sse_reg_save_offset = frame->stack_realign_offset + + sse_reg_space_needed; } + else + { + frame->stack_realign_offset = offset; - frame->sse_reg_save_offset = offset; + if (TARGET_64BIT && m->call_ms2sysv) + { + m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD); + offset += xlogue_layout::get_instance ().get_stack_space_used (); + } - /* Va-arg area */ - frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; - offset += frame->va_arg_size; + /* Align and set SSE register save area. */ + else if (frame->nsseregs) + { + /* If the incoming stack boundary is at least 16 bytes, or DRAP is + required and the DRAP re-alignment boundary is at least 16 bytes, + then we want the SSE register save area properly aligned. */ + if (ix86_incoming_stack_boundary >= 128 + || (stack_realign_drap && stack_alignment_needed >= 16)) + offset = ROUND_UP (offset, 16); + offset += frame->nsseregs * 16; + } + frame->sse_reg_save_offset = offset; + offset += frame->va_arg_size; + } /* Align start of frame for local function. */ - if (stack_realign_fp - || m->call_ms2sysv - || offset != frame->sse_reg_save_offset + if (m->call_ms2sysv + || frame->va_arg_size != 0 || size != 0 || !crtl->is_leaf || cfun->calls_alloca diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index b08e45f68d4..e58882baee8 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2501,7 +2501,7 @@ struct GTY(()) ix86_frame HOST_WIDE_INT stack_pointer_offset; HOST_WIDE_INT hfp_save_offset; HOST_WIDE_INT reg_save_offset; - HOST_WIDE_INT stack_realign_allocate_offset; + HOST_WIDE_INT stack_realign_allocate; HOST_WIDE_INT stack_realign_offset; HOST_WIDE_INT sse_reg_save_offset; -- 2.13.3