On Tue, Jan 9, 2018 at 10:29 PM, H.J. Lu <hongjiu...@intel.com> wrote:
> We should also adjust stack_realign_offset for the largest alignment of
> stack slot actually used when stack realignment isn't needed.  This is
> required to keep stack frame properly aligned to satisfy the largest
> alignment of stack slots.
>
> Tested on Linux/i686 and Linux/x86-64.
>
> OK for trunk?
>
> Thanks.
>
> H.J.
> ---
> gcc/
>
>         PR target/83735
>         * config/i386/i386.c (ix86_compute_frame_layout): Always adjust
>         stack_realign_offset for the largest alignment of stack slot
>         actually used.
>         (ix86_find_max_used_stack_alignment): New function.
>         (ix86_finalize_stack_frame_flags): Use it.  Set
>         max_used_stack_alignment if we don't realign stack.
>         * config/i386/i386.h (machine_function): Add
>         max_used_stack_alignment.
>
> gcc/testsuite/
>
>         PR target/83735
>         * gcc.target/i386/pr83735.c: New test.

LGTM, but you know this part better than I.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386.c                  | 120 
> +++++++++++++++++++-------------
>  gcc/config/i386/i386.h                  |   3 +
>  gcc/testsuite/gcc.target/i386/pr83735.c |  55 +++++++++++++++
>  3 files changed, 131 insertions(+), 47 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr83735.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 8696f931806..8c08394ede7 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -11259,7 +11259,11 @@ ix86_compute_frame_layout (void)
>    /* Calculate the size of the va-arg area (not including padding, if any).  
> */
>    frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
>
> -  if (stack_realign_fp)
> +  /* Also adjust stack_realign_offset for the largest alignment of
> +     stack slot actually used.  */
> +  if (stack_realign_fp
> +      || (cfun->machine->max_used_stack_alignment != 0
> +         && (offset % cfun->machine->max_used_stack_alignment) != 0))
>      {
>        /* We may need a 16-byte aligned stack for the remainder of the
>          register save area, but the stack frame for the local function
> @@ -12668,6 +12672,62 @@ output_probe_stack_range (rtx reg, rtx end)
>    return "";
>  }
>
> +/* Return true if stack frame is required.  Update STACK_ALIGNMENT
> +   to the largest alignment, in bits, of stack slot used if stack
> +   frame is required and CHECK_STACK_SLOT is true.  */
> +
> +static bool
> +ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
> +                                   bool check_stack_slot)
> +{
> +  HARD_REG_SET set_up_by_prologue, prologue_used;
> +  basic_block bb;
> +
> +  CLEAR_HARD_REG_SET (prologue_used);
> +  CLEAR_HARD_REG_SET (set_up_by_prologue);
> +  add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
> +  add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
> +  add_to_hard_reg_set (&set_up_by_prologue, Pmode,
> +                      HARD_FRAME_POINTER_REGNUM);
> +
> +  /* The preferred stack alignment is the minimum stack alignment.  */
> +  if (stack_alignment > crtl->preferred_stack_boundary)
> +    stack_alignment = crtl->preferred_stack_boundary;
> +
> +  bool require_stack_frame = false;
> +
> +  FOR_EACH_BB_FN (bb, cfun)
> +    {
> +      rtx_insn *insn;
> +      FOR_BB_INSNS (bb, insn)
> +       if (NONDEBUG_INSN_P (insn)
> +           && requires_stack_frame_p (insn, prologue_used,
> +                                      set_up_by_prologue))
> +         {
> +           require_stack_frame = true;
> +
> +           if (check_stack_slot)
> +             {
> +               /* Find the maximum stack alignment.  */
> +               subrtx_iterator::array_type array;
> +               FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
> +                 if (MEM_P (*iter)
> +                     && (reg_mentioned_p (stack_pointer_rtx,
> +                                          *iter)
> +                         || reg_mentioned_p (frame_pointer_rtx,
> +                                             *iter)))
> +                   {
> +                     unsigned int alignment = MEM_ALIGN (*iter);
> +                     if (alignment > stack_alignment)
> +                       stack_alignment = alignment;
> +                   }
> +             }
> +         }
> +    }
> +
> +  return require_stack_frame;
> +}
> +
>  /* Finalize stack_realign_needed and frame_pointer_needed flags, which
>     will guide prologue/epilogue to be generated in correct form.  */
>
> @@ -12718,52 +12778,8 @@ ix86_finalize_stack_frame_flags (void)
>        && ix86_nsaved_sseregs () == 0
>        && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
>      {
> -      HARD_REG_SET set_up_by_prologue, prologue_used;
> -      basic_block bb;
> -
> -      CLEAR_HARD_REG_SET (prologue_used);
> -      CLEAR_HARD_REG_SET (set_up_by_prologue);
> -      add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
> -      add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
> -      add_to_hard_reg_set (&set_up_by_prologue, Pmode,
> -                          HARD_FRAME_POINTER_REGNUM);
> -
> -      /* The preferred stack alignment is the minimum stack alignment.  */
> -      if (stack_alignment > crtl->preferred_stack_boundary)
> -       stack_alignment = crtl->preferred_stack_boundary;
> -
> -      bool require_stack_frame = false;
> -
> -      FOR_EACH_BB_FN (bb, cfun)
> -        {
> -          rtx_insn *insn;
> -         FOR_BB_INSNS (bb, insn)
> -           if (NONDEBUG_INSN_P (insn)
> -               && requires_stack_frame_p (insn, prologue_used,
> -                                          set_up_by_prologue))
> -             {
> -               require_stack_frame = true;
> -
> -               if (stack_realign)
> -                 {
> -                   /* Find the maximum stack alignment.  */
> -                   subrtx_iterator::array_type array;
> -                   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
> -                     if (MEM_P (*iter)
> -                         && (reg_mentioned_p (stack_pointer_rtx,
> -                                              *iter)
> -                             || reg_mentioned_p (frame_pointer_rtx,
> -                                                 *iter)))
> -                       {
> -                         unsigned int alignment = MEM_ALIGN (*iter);
> -                         if (alignment > stack_alignment)
> -                           stack_alignment = alignment;
> -                       }
> -                 }
> -             }
> -       }
> -
> -      if (require_stack_frame)
> +      if (ix86_find_max_used_stack_alignment (stack_alignment,
> +                                             stack_realign))
>         {
>           /* Stack frame is required.  If stack alignment needed is less
>              than incoming stack boundary, don't realign stack.  */
> @@ -12851,6 +12867,16 @@ ix86_finalize_stack_frame_flags (void)
>           recompute_frame_layout_p = true;
>         }
>      }
> +  else if (crtl->max_used_stack_slot_alignment
> +          > crtl->preferred_stack_boundary)
> +    {
> +      /* We don't need to realign stack.  But we still need to keep
> +        stack frame properly aligned to satisfy the largest alignment
> +        of stack slots.  */
> +      if (ix86_find_max_used_stack_alignment (stack_alignment, true))
> +       cfun->machine->max_used_stack_alignment
> +         = stack_alignment / BITS_PER_UNIT;
> +    }
>
>    if (crtl->stack_realign_needed != stack_realign)
>      recompute_frame_layout_p = true;
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 93b7a2c5915..38f264e36b2 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -2596,6 +2596,9 @@ struct GTY(()) machine_function {
>    /* Nonzero if the function places outgoing arguments on stack.  */
>    BOOL_BITFIELD outgoing_args_on_stack : 1;
>
> +  /* The largest alignment, in bytes, of stack slot actually used.  */
> +  unsigned int max_used_stack_alignment;
> +
>    /* During prologue/epilogue generation, the current frame state.
>       Otherwise, the frame state at the end of the prologue.  */
>    struct machine_frame_state fs;
> diff --git a/gcc/testsuite/gcc.target/i386/pr83735.c 
> b/gcc/testsuite/gcc.target/i386/pr83735.c
> new file mode 100644
> index 00000000000..786c90a5839
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr83735.c
> @@ -0,0 +1,55 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target avx } */
> +/* { dg-options "-O3 -mavx" } */
> +
> +#include <stdlib.h>
> +#include "cpuid.h"
> +#include "m256-check.h"
> +#include "avx-os-support.h"
> +
> +static void __attribute__((constructor))
> +check_avx (void)
> +{
> +  unsigned int eax, ebx, ecx, edx;
> +
> +  if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
> +    exit (0);
> +
> +  /* Run AVX test only if host has AVX support.  */
> +  if (((ecx & (bit_AVX | bit_OSXSAVE)) == (bit_AVX | bit_OSXSAVE))
> +      && avx_os_support ())
> +    return;
> +
> +  exit (0);
> +}
> +
> +struct S
> +{
> +  short b;
> +  long c;
> +  char d;
> +  long e;
> +  unsigned:8;
> +};
> +
> +int f, h, k, l;
> +int g[10];
> +volatile struct S j;
> +char m;
> +
> +int
> +main (void)
> +{
> +  int i;
> +  struct S n;
> +  for (i = 0; i < 6; i++)
> +    {
> +      for (f = 0; f < 10; f++)
> +       g[f] = 4;
> +      n = j;
> +      h = m == 0 ? 1 : 5 % m;
> +      if (l)
> +       n.b = k;
> +    }
> +  return n.b;
> +}
> --
> 2.14.3
>

Reply via email to