On Wed, May 22, 2019 at 5:10 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > On Wed, May 22, 2019 at 12:31 AM Uros Bizjak <ubiz...@gmail.com> wrote: > > > > On Tue, May 21, 2019 at 5:01 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > > > get_frame_size () returns used stack slots during compilation, which > > > may be optimized out later. This patch does the followings: > > > > > > 1. Add no_stack_frame to machine_function to indicate that the function > > > doesn't need a stack frame. > > > > Can you please add "stack_frame_required" to machine_function with > > inverted meaning instead? Every single usage of no_stack_frame is > > inverted, and it is hard to follow this negative logic through the > > code. > > Fixed. > > > > 2. Change ix86_find_max_used_stack_alignment to set no_stack_frame. > > > 3. Always call ix86_find_max_used_stack_alignment to check if stack > > > frame is needed. > > > > > > Tested on i686 and x86-64 with > > > > > > --with-arch=native --with-cpu=native > > > > > > Tested on AVX512 machine configured with > > > > > > --with-arch=native --with-cpu=native > > > > > > gcc/ > > > > > > PR target/88483 > > > * config/i386/i386.c (ix86_get_frame_size): New function. > > > (ix86_frame_pointer_required): Replace get_frame_size with > > > ix86_get_frame_size. > > > (ix86_compute_frame_layout): Likewise. > > > (ix86_find_max_used_stack_alignment): Changed to void. Set > > > no_stack_frame. > > > (ix86_finalize_stack_frame_flags): Always call > > > ix86_find_max_used_stack_alignment. Replace get_frame_size with > > > ix86_get_frame_size. > > > * config/i386/i386.h (machine_function): Add no_stack_frame. > > > > > > gcc/testsuite/ > > > > > > PR target/88483 > > > * gcc.target/i386/stackalign/pr88483-1.c: New test. > > > * gcc.target/i386/stackalign/pr88483-2.c: Likewise. > > > --- > > > gcc/config/i386/i386.c | 53 ++++++++++++------- > > > gcc/config/i386/i386.h | 3 ++ > > > .../gcc.target/i386/stackalign/pr88483-1.c | 18 +++++++ > > > .../gcc.target/i386/stackalign/pr88483-2.c | 18 +++++++ > > > 4 files changed, 74 insertions(+), 18 deletions(-) > > > create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c > > > > > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > > > index 54607748b0b..d0b2a4f8b70 100644 > > > --- a/gcc/config/i386/i386.c > > > +++ b/gcc/config/i386/i386.c > > > @@ -5012,6 +5012,19 @@ ix86_can_use_return_insn_p (void) > > > && (frame.nregs + frame.nsseregs) == 0); > > > } > > > > > > +/* Return stack frame size. get_frame_size () returns used stack slots > > > + during compilation, which may be optimized out later. no_stack_frame > > > + is set to true if stack frame isn't needed. */ > > > + > > > +static HOST_WIDE_INT > > > +ix86_get_frame_size (void) > > > +{ > > > + if (cfun->machine->no_stack_frame) > > > + return 0; > > > + else > > > + return get_frame_size (); > > > +} > > > + > > > /* Value should be nonzero if functions must have frame pointers. > > > Zero means the frame pointer need not be set up (and parms may > > > be accessed via the stack pointer) in functions that seem suitable. > > > */ > > > @@ -5035,7 +5048,7 @@ ix86_frame_pointer_required (void) > > > > > > /* Win64 SEH, very large frames need a frame-pointer as maximum stack > > > allocation is 4GB. */ > > > - if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE) > > > + if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE) > > > return true; > > > > > > /* SSE saves require frame-pointer when stack is misaligned. */ > > > @@ -5842,7 +5855,7 @@ ix86_compute_frame_layout (void) > > > unsigned HOST_WIDE_INT stack_alignment_needed; > > > HOST_WIDE_INT offset; > > > unsigned HOST_WIDE_INT preferred_alignment; > > > - HOST_WIDE_INT size = get_frame_size (); > > > + HOST_WIDE_INT size = ix86_get_frame_size (); > > > HOST_WIDE_INT to_allocate; > > > > > > /* m->call_ms2sysv is initially enabled in ix86_expand_call for all > > > 64-bit > > > @@ -7436,11 +7449,11 @@ output_probe_stack_range (rtx reg, rtx end) > > > return ""; > > > } > > > > > > -/* Return true if stack frame is required. Update STACK_ALIGNMENT > > > - to the largest alignment, in bits, of stack slot used if stack > > > - frame is required and CHECK_STACK_SLOT is true. */ > > > +/* Set no_stack_frame to true if stack frame isn't required. Update > > > + STACK_ALIGNMENT to the largest alignment, in bits, of stack slot > > > + used if stack frame is required and CHECK_STACK_SLOT is true. */ > > > > From the above comment, you can see the confusion caused by using > > negative logic for no_stack_frame. > > > > > -static bool > > > +static void > > > ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, > > > bool check_stack_slot) > > > { > > > @@ -7489,7 +7502,7 @@ ix86_find_max_used_stack_alignment (unsigned int > > > &stack_alignment, > > > } > > > } > > > > > > - return require_stack_frame; > > > + cfun->machine->no_stack_frame = !require_stack_frame; > > > } > > > > > > /* Finalize stack_realign_needed and frame_pointer_needed flags, which > > > @@ -7519,6 +7532,14 @@ ix86_finalize_stack_frame_flags (void) > > > return; > > > } > > > > > > + /* It is always safe to compute max_used_stack_alignment. We > > > + compute it only if 128-bit aligned load/store may be generated > > > + on misaligned stack slot which will lead to segfault. */ > > > + bool check_stack_slot > > > + = (stack_realign || crtl->max_used_stack_slot_alignment >= 128); > > > + ix86_find_max_used_stack_alignment (stack_alignment, > > > + check_stack_slot); > > > + > > > /* If the only reason for frame_pointer_needed is that we > > > conservatively > > > assumed stack realignment might be needed or -fno-omit-frame-pointer > > > is used, but in the end nothing that needed the stack alignment had > > > @@ -7538,12 +7559,11 @@ ix86_finalize_stack_frame_flags (void) > > > && flag_exceptions > > > && cfun->can_throw_non_call_exceptions) > > > && !ix86_frame_pointer_required () > > > - && get_frame_size () == 0 > > > + && ix86_get_frame_size () == 0 > > > && ix86_nsaved_sseregs () == 0 > > > && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0) > > > { > > > - if (ix86_find_max_used_stack_alignment (stack_alignment, > > > - stack_realign)) > > > + if (!cfun->machine->no_stack_frame) > > > { > > > /* Stack frame is required. If stack alignment needed is less > > > than incoming stack boundary, don't realign stack. */ > > > @@ -7631,17 +7651,14 @@ ix86_finalize_stack_frame_flags (void) > > > recompute_frame_layout_p = true; > > > } > > > } > > > - else if (crtl->max_used_stack_slot_alignment >= 128) > > > + else if (crtl->max_used_stack_slot_alignment >= 128 > > > + && !cfun->machine->no_stack_frame) > > > { > > > /* We don't need to realign stack. max_used_stack_alignment is > > > used to decide how stack frame should be aligned. This is > > > - independent of any psABIs nor 32-bit vs 64-bit. It is always > > > - safe to compute max_used_stack_alignment. We compute it only > > > - if 128-bit aligned load/store may be generated on misaligned > > > - stack slot which will lead to segfault. */ > > > - if (ix86_find_max_used_stack_alignment (stack_alignment, true)) > > > - cfun->machine->max_used_stack_alignment > > > - = stack_alignment / BITS_PER_UNIT; > > > + independent of any psABIs nor 32-bit vs 64-bit. */ > > > + cfun->machine->max_used_stack_alignment > > > + = stack_alignment / BITS_PER_UNIT;; > > > > Double semicolon. > > > > Fixed. > > Here is the updated patch. OK for trunk?
LGTM. Thanks, Uros.