On Fri, 7 Jun 2019, Richard Sandiford wrote:

> "H.J. Lu" <hjl.to...@gmail.com> writes:
> > locate_and_pad_parm is called when expanding function call from
> > initialize_argument_information and when generating function body
> > from assign_parm_find_entry_rtl:
> >
> >   /* Remember if the outgoing parameter requires extra alignment on the
> >      calling function side.  */
> >   if (crtl->stack_alignment_needed < boundary)
> >     crtl->stack_alignment_needed = boundary;
> >   if (crtl->preferred_stack_boundary < boundary)
> >     crtl->preferred_stack_boundary = boundary;
> >
> > stack_alignment_needed and preferred_stack_boundary should be updated
> > only when expanding function call, not when generating function body.
> > Add an argument, outgoing_p, to locate_and_pad_parm to indicate for
> > expanding function call.  Update stack_alignment_needed and
> > preferred_stack_boundary if the parameter is passed on stack and only
> > when expanding function call.
> >
> > Tested on Linux/x86-64.
> >
> > OK for trunk?
> >
> > Thanks.
> >
> > -- 
> > H.J.
> >
> > From e91e20ad8e10373db2c6d8f99a3da0bbf46c5c34 Mon Sep 17 00:00:00 2001
> > From: "H.J. Lu" <hjl.to...@gmail.com>
> > Date: Wed, 5 Jun 2019 12:55:19 -0700
> > Subject: [PATCH] Update preferred_stack_boundary only when expanding 
> > function
> >  call
> >
> > locate_and_pad_parm is called when expanding function call from
> > initialize_argument_information and when generating function body
> > from assign_parm_find_entry_rtl:
> >
> >   /* Remember if the outgoing parameter requires extra alignment on the
> >      calling function side.  */
> >   if (crtl->stack_alignment_needed < boundary)
> >     crtl->stack_alignment_needed = boundary;
> >   if (crtl->preferred_stack_boundary < boundary)
> >     crtl->preferred_stack_boundary = boundary;
> >
> > stack_alignment_needed and preferred_stack_boundary should be updated
> > only when expanding function call, not when generating function body.
> > Add an argument, outgoing_p, to locate_and_pad_parm to indicate for
> > expanding function call.  Update stack_alignment_needed and
> > preferred_stack_boundary if the parameter is passed on stack and only
> > when expanding function call.
> >
> > Tested on Linux/x86-64.
> >
> > gcc/
> >
> >     PR rtl-optimization/90765
> >     * function.c (assign_parm_find_entry_rtl): Pass false to
> >     locate_and_pad_parm.
> >     (locate_and_pad_parm): Add an argument, outgoing_p, to indicate
> >     for expanding function call.  Update stack_alignment_needed and
> >     preferred_stack_boundary only if outgoing_p is true and the
> >     the parameter is passed on stack.
> >     * function.h (locate_and_pad_parm): Add an argument, outgoing_p,
> >     defaulting to true.
> >
> > gcc/testsuite/
> >
> >     PR rtl-optimization/90765
> >     * gcc.target/i386/pr90765-1.c: New test.
> >     * gcc.target/i386/pr90765-2.c: Likewise.
> > ---
> >  gcc/function.c                            | 21 +++++++++++++--------
> >  gcc/function.h                            |  3 ++-
> >  gcc/testsuite/gcc.target/i386/pr90765-1.c | 11 +++++++++++
> >  gcc/testsuite/gcc.target/i386/pr90765-2.c | 18 ++++++++++++++++++
> >  4 files changed, 44 insertions(+), 9 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-2.c
> >
> > diff --git a/gcc/function.c b/gcc/function.c
> > index e30ee259bec..9b6673f6f0d 100644
> > --- a/gcc/function.c
> > +++ b/gcc/function.c
> > @@ -2601,7 +2601,7 @@ assign_parm_find_entry_rtl (struct 
> > assign_parm_data_all *all,
> >    locate_and_pad_parm (data->promoted_mode, data->passed_type, in_regs,
> >                    all->reg_parm_stack_space,
> >                    entry_parm ? data->partial : 0, current_function_decl,
> > -                  &all->stack_args_size, &data->locate);
> > +                  &all->stack_args_size, &data->locate, false);
> >  
> >    /* Update parm_stack_boundary if this parameter is passed in the
> >       stack.  */
> > @@ -3954,7 +3954,8 @@ locate_and_pad_parm (machine_mode passed_mode, tree 
> > type, int in_regs,
> >                  int reg_parm_stack_space, int partial,
> >                  tree fndecl ATTRIBUTE_UNUSED,
> >                  struct args_size *initial_offset_ptr,
> > -                struct locate_and_pad_arg_data *locate)
> > +                struct locate_and_pad_arg_data *locate,
> > +                bool outgoing_p)
> >  {
> >    tree sizetree;
> >    pad_direction where_pad;
> > @@ -4021,12 +4022,16 @@ locate_and_pad_parm (machine_mode passed_mode, tree 
> > type, int in_regs,
> >     }
> >      }
> >  
> > -  /* Remember if the outgoing parameter requires extra alignment on the
> > -     calling function side.  */
> > -  if (crtl->stack_alignment_needed < boundary)
> > -    crtl->stack_alignment_needed = boundary;
> > -  if (crtl->preferred_stack_boundary < boundary)
> > -    crtl->preferred_stack_boundary = boundary;
> > +  if (outgoing_p && !in_regs)
> > +    {
> > +      /* Remember if the outgoing parameter requires extra alignment on
> > +    the calling function side if this parameter is passed in the
> > +    stack.  */
> > +      if (crtl->stack_alignment_needed < boundary)
> > +   crtl->stack_alignment_needed = boundary;
> > +      if (crtl->preferred_stack_boundary < boundary)
> > +   crtl->preferred_stack_boundary = boundary;
> > +    }
> 
> Just my opinion, but I think this shows that the code isn't really
> in the right place.  IMO locate_and_pad_parm should just describe
> the ABI and is too low-level to be modifying global state like this.
> 
> I think we could instead do this by walking over the argument vectors
> in a subroutine of expand_call and emit_library_call_value_1.
> expand_call is also where we do:
> 
>   /* Ensure current function's preferred stack boundary is at least
>      what we need.  Stack alignment may also increase preferred stack
>      boundary.  */
>   if (crtl->preferred_stack_boundary < preferred_stack_boundary)
>     crtl->preferred_stack_boundary = preferred_stack_boundary;
>   else
>     preferred_stack_boundary = crtl->preferred_stack_boundary;

Agreed - that would be much cleaner.

Richard.

> Thanks,
> Richard
> 
> >  
> >    if (ARGS_GROW_DOWNWARD)
> >      {
> > diff --git a/gcc/function.h b/gcc/function.h
> > index bfe9919a760..5ad7a33fd39 100644
> > --- a/gcc/function.h
> > +++ b/gcc/function.h
> > @@ -613,7 +613,8 @@ extern bool use_register_for_decl (const_tree);
> >  extern gimple_seq gimplify_parameters (gimple_seq *);
> >  extern void locate_and_pad_parm (machine_mode, tree, int, int, int,
> >                              tree, struct args_size *,
> > -                            struct locate_and_pad_arg_data *);
> > +                            struct locate_and_pad_arg_data *,
> > +                            bool = true);
> >  extern void generate_setjmp_warnings (void);
> >  
> >  /* Identify BLOCKs referenced by more than one NOTE_INSN_BLOCK_{BEG,END},
> > diff --git a/gcc/testsuite/gcc.target/i386/pr90765-1.c 
> > b/gcc/testsuite/gcc.target/i386/pr90765-1.c
> > new file mode 100644
> > index 00000000000..178c3ff8054
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr90765-1.c
> > @@ -0,0 +1,11 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mavx512f" } */
> > +/* { dg-final { scan-assembler-not "and\[lq\]?\[\\t \]*\\$-64,\[\\t 
> > \]*%\[re\]?sp" } } */
> > +
> > +typedef int __v16si __attribute__ ((__vector_size__ (64)));
> > +
> > +void
> > +foo (__v16si x, int i0, int i1, int i2, int i3, int i4, int i5, __v16si *p)
> > +{
> > +  *p = x;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr90765-2.c 
> > b/gcc/testsuite/gcc.target/i386/pr90765-2.c
> > new file mode 100644
> > index 00000000000..45cf1f03747
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr90765-2.c
> > @@ -0,0 +1,18 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mavx512f" } */
> > +/* { dg-final { scan-assembler "and\[lq\]?\[\\t \]*\\$-64,\[\\t 
> > \]*%\[re\]?sp" } } */
> > +/* { dg-skip-if "" { x86_64-*-mingw* } } */
> > +
> > +typedef int __v16si __attribute__ ((__vector_size__ (64)));
> > +
> > +extern void foo (__v16si, __v16si, __v16si, __v16si, __v16si, __v16si,
> > +            __v16si, __v16si, __v16si, int, int, int, int, int,
> > +            int, __v16si *);
> > +
> > +extern __v16si x, y;
> > +
> > +void
> > +bar (void)
> > +{
> > +  foo (x, x, x, x, x, x, x, x, x, 0, 1, 2, 3, 4, 5, &y);
> > +}
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Linux GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany;
GF: Felix Imendörffer, Mary Higgins, Sri Rasiah; HRB 21284 (AG Nürnberg)

Reply via email to