On Wed, May 22, 2019 at 5:10 PM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> On Wed, May 22, 2019 at 12:31 AM Uros Bizjak <ubiz...@gmail.com> wrote:
> >
> > On Tue, May 21, 2019 at 5:01 PM H.J. Lu <hjl.to...@gmail.com> wrote:
> > >
> > > get_frame_size () returns used stack slots during compilation, which
> > > may be optimized out later.  This patch does the followings:
> > >
> > > 1. Add no_stack_frame to machine_function to indicate that the function
> > > doesn't need a stack frame.
> >
> > Can you please add "stack_frame_required" to machine_function with
> > inverted meaning instead? Every single usage of no_stack_frame is
> > inverted, and it is hard to follow this negative logic through the
> > code.
>
> Fixed.
>
> > > 2. Change ix86_find_max_used_stack_alignment to set no_stack_frame.
> > > 3. Always call ix86_find_max_used_stack_alignment to check if stack
> > > frame is needed.
> > >
> > > Tested on i686 and x86-64 with
> > >
> > > --with-arch=native --with-cpu=native
> > >
> > > Tested on AVX512 machine configured with
> > >
> > > --with-arch=native --with-cpu=native
> > >
> > > gcc/
> > >
> > >         PR target/88483
> > >         * config/i386/i386.c (ix86_get_frame_size): New function.
> > >         (ix86_frame_pointer_required): Replace get_frame_size with
> > >         ix86_get_frame_size.
> > >         (ix86_compute_frame_layout): Likewise.
> > >         (ix86_find_max_used_stack_alignment): Changed to void.  Set
> > >         no_stack_frame.
> > >         (ix86_finalize_stack_frame_flags): Always call
> > >         ix86_find_max_used_stack_alignment.  Replace get_frame_size with
> > >         ix86_get_frame_size.
> > >         * config/i386/i386.h (machine_function): Add no_stack_frame.
> > >
> > > gcc/testsuite/
> > >
> > >         PR target/88483
> > >         * gcc.target/i386/stackalign/pr88483-1.c: New test.
> > >         * gcc.target/i386/stackalign/pr88483-2.c: Likewise.
> > > ---
> > >  gcc/config/i386/i386.c                        | 53 ++++++++++++-------
> > >  gcc/config/i386/i386.h                        |  3 ++
> > >  .../gcc.target/i386/stackalign/pr88483-1.c    | 18 +++++++
> > >  .../gcc.target/i386/stackalign/pr88483-2.c    | 18 +++++++
> > >  4 files changed, 74 insertions(+), 18 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c
> > >
> > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > > index 54607748b0b..d0b2a4f8b70 100644
> > > --- a/gcc/config/i386/i386.c
> > > +++ b/gcc/config/i386/i386.c
> > > @@ -5012,6 +5012,19 @@ ix86_can_use_return_insn_p (void)
> > >           && (frame.nregs + frame.nsseregs) == 0);
> > >  }
> > >
> > > +/* Return stack frame size.  get_frame_size () returns used stack slots
> > > +   during compilation, which may be optimized out later.  no_stack_frame
> > > +   is set to true if stack frame isn't needed.  */
> > > +
> > > +static HOST_WIDE_INT
> > > +ix86_get_frame_size (void)
> > > +{
> > > +  if (cfun->machine->no_stack_frame)
> > > +    return 0;
> > > +  else
> > > +    return get_frame_size ();
> > > +}
> > > +
> > >  /* Value should be nonzero if functions must have frame pointers.
> > >     Zero means the frame pointer need not be set up (and parms may
> > >     be accessed via the stack pointer) in functions that seem suitable.  
> > > */
> > > @@ -5035,7 +5048,7 @@ ix86_frame_pointer_required (void)
> > >
> > >    /* Win64 SEH, very large frames need a frame-pointer as maximum stack
> > >       allocation is 4GB.  */
> > > -  if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
> > > +  if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
> > >      return true;
> > >
> > >    /* SSE saves require frame-pointer when stack is misaligned.  */
> > > @@ -5842,7 +5855,7 @@ ix86_compute_frame_layout (void)
> > >    unsigned HOST_WIDE_INT stack_alignment_needed;
> > >    HOST_WIDE_INT offset;
> > >    unsigned HOST_WIDE_INT preferred_alignment;
> > > -  HOST_WIDE_INT size = get_frame_size ();
> > > +  HOST_WIDE_INT size = ix86_get_frame_size ();
> > >    HOST_WIDE_INT to_allocate;
> > >
> > >    /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 
> > > 64-bit
> > > @@ -7436,11 +7449,11 @@ output_probe_stack_range (rtx reg, rtx end)
> > >    return "";
> > >  }
> > >
> > > -/* Return true if stack frame is required.  Update STACK_ALIGNMENT
> > > -   to the largest alignment, in bits, of stack slot used if stack
> > > -   frame is required and CHECK_STACK_SLOT is true.  */
> > > +/* Set no_stack_frame to true if stack frame isn't required.  Update
> > > +   STACK_ALIGNMENT to the largest alignment, in bits, of stack slot
> > > +   used if stack frame is required and CHECK_STACK_SLOT is true.  */
> >
> > From the above comment, you can see the confusion caused by using
> > negative logic for no_stack_frame.
> >
> > > -static bool
> > > +static void
> > >  ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
> > >                                     bool check_stack_slot)
> > >  {
> > > @@ -7489,7 +7502,7 @@ ix86_find_max_used_stack_alignment (unsigned int 
> > > &stack_alignment,
> > >           }
> > >      }
> > >
> > > -  return require_stack_frame;
> > > +  cfun->machine->no_stack_frame = !require_stack_frame;
> > >  }
> > >
> > >  /* Finalize stack_realign_needed and frame_pointer_needed flags, which
> > > @@ -7519,6 +7532,14 @@ ix86_finalize_stack_frame_flags (void)
> > >        return;
> > >      }
> > >
> > > +  /* It is always safe to compute max_used_stack_alignment.  We
> > > +     compute it only if 128-bit aligned load/store may be generated
> > > +     on misaligned stack slot which will lead to segfault. */
> > > +  bool check_stack_slot
> > > +    = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
> > > +  ix86_find_max_used_stack_alignment (stack_alignment,
> > > +                                     check_stack_slot);
> > > +
> > >    /* If the only reason for frame_pointer_needed is that we 
> > > conservatively
> > >       assumed stack realignment might be needed or -fno-omit-frame-pointer
> > >       is used, but in the end nothing that needed the stack alignment had
> > > @@ -7538,12 +7559,11 @@ ix86_finalize_stack_frame_flags (void)
> > >            && flag_exceptions
> > >            && cfun->can_throw_non_call_exceptions)
> > >        && !ix86_frame_pointer_required ()
> > > -      && get_frame_size () == 0
> > > +      && ix86_get_frame_size () == 0
> > >        && ix86_nsaved_sseregs () == 0
> > >        && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
> > >      {
> > > -      if (ix86_find_max_used_stack_alignment (stack_alignment,
> > > -                                             stack_realign))
> > > +      if (!cfun->machine->no_stack_frame)
> > >         {
> > >           /* Stack frame is required.  If stack alignment needed is less
> > >              than incoming stack boundary, don't realign stack.  */
> > > @@ -7631,17 +7651,14 @@ ix86_finalize_stack_frame_flags (void)
> > >           recompute_frame_layout_p = true;
> > >         }
> > >      }
> > > -  else if (crtl->max_used_stack_slot_alignment >= 128)
> > > +  else if (crtl->max_used_stack_slot_alignment >= 128
> > > +          && !cfun->machine->no_stack_frame)
> > >      {
> > >        /* We don't need to realign stack.  max_used_stack_alignment is
> > >          used to decide how stack frame should be aligned.  This is
> > > -        independent of any psABIs nor 32-bit vs 64-bit.  It is always
> > > -        safe to compute max_used_stack_alignment.  We compute it only
> > > -        if 128-bit aligned load/store may be generated on misaligned
> > > -        stack slot which will lead to segfault.   */
> > > -      if (ix86_find_max_used_stack_alignment (stack_alignment, true))
> > > -       cfun->machine->max_used_stack_alignment
> > > -         = stack_alignment / BITS_PER_UNIT;
> > > +        independent of any psABIs nor 32-bit vs 64-bit.  */
> > > +      cfun->machine->max_used_stack_alignment
> > > +       = stack_alignment / BITS_PER_UNIT;;
> >
> > Double semicolon.
> >
>
> Fixed.
>
> Here is the updated patch.   OK for trunk?

LGTM.

Thanks,
Uros.

Reply via email to