On Thu, Jul 4, 2024 at 11:00 AM Hongtao Liu <crazy...@gmail.com> wrote:
>
> On Tue, Jul 2, 2024 at 11:24 AM Hongyu Wang <hongyu.w...@intel.com> wrote:
> >
> > Hi,
> >
> > According to APX spec, the pushp/popp pairs should be matched,
> > otherwise the PPX hint cannot take effect and cause performance loss.
> >
> > In the ix86_expand_epilogue, there are several optimizations that may
> > cause the epilogue using mov to restore the regs. Check if PPX applied
> > and prevent usage of mov/leave in the epilogue.
> >
> > Bootstrapped/regtested on x86_64-pc-linux-gnu.
> >
> > Ok for trunk?
> Ok.
Please backport the fix to GCC14 branch.
> >
> > gcc/ChangeLog:
> >
> >         * config/i386/i386.cc (ix86_expand_prologue): Set apx_ppx_used
> >         flag in m.fs with TARGET_APX_PPX && !crtl->calls_eh_return.
> >         (ix86_emit_save_regs): Emit ppx is available only when
> >         TARGET_APX_PPX && !crtl->calls_eh_return.
> >         (ix86_expand_epilogue): Don't restore reg using mov when
> >         apx_ppx_used flag is true.
> >         * config/i386/i386.h (struct machine_frame_state):
> >         Add apx_ppx_used flag.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/i386/apx-ppx-2.c: New test.
> >         * gcc.target/i386/apx-ppx-3.c: Likewise.
> > ---
> >  gcc/config/i386/i386.cc                   | 13 +++++++++----
> >  gcc/config/i386/i386.h                    |  4 ++++
> >  gcc/testsuite/gcc.target/i386/apx-ppx-2.c | 14 ++++++++++++++
> >  gcc/testsuite/gcc.target/i386/apx-ppx-3.c |  7 +++++++
> >  4 files changed, 34 insertions(+), 4 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> >
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index bd7411190af..99def8d4a77 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -7429,6 +7429,7 @@ ix86_emit_save_regs (void)
> >  {
> >    int regno;
> >    rtx_insn *insn;
> > +  bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
> >
> >    if (!TARGET_APX_PUSH2POP2
> >        || !ix86_can_use_push2pop2 ()
> > @@ -7438,7 +7439,7 @@ ix86_emit_save_regs (void)
> >         if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
> >           {
> >             insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
> > -                                       TARGET_APX_PPX));
> > +                                       use_ppx));
> >             RTX_FRAME_RELATED_P (insn) = 1;
> >           }
> >      }
> > @@ -7469,7 +7470,7 @@ ix86_emit_save_regs (void)
> >                                                               
> > regno_list[0]),
> >                                                  gen_rtx_REG (word_mode,
> >                                                               
> > regno_list[1]),
> > -                                                TARGET_APX_PPX));
> > +                                                use_ppx));
> >                     RTX_FRAME_RELATED_P (insn) = 1;
> >                     rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc 
> > (3));
> >
> > @@ -7502,7 +7503,7 @@ ix86_emit_save_regs (void)
> >             else
> >               {
> >                 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
> > -                                           TARGET_APX_PPX));
> > +                                           use_ppx));
> >                 RTX_FRAME_RELATED_P (insn) = 1;
> >                 aligned = true;
> >               }
> > @@ -7511,7 +7512,7 @@ ix86_emit_save_regs (void)
> >         {
> >           insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
> >                                                    regno_list[0]),
> > -                                     TARGET_APX_PPX));
> > +                                     use_ppx));
> >           RTX_FRAME_RELATED_P (insn) = 1;
> >         }
> >      }
> > @@ -8985,6 +8986,7 @@ ix86_expand_prologue (void)
> >        if (!frame.save_regs_using_mov)
> >         {
> >           ix86_emit_save_regs ();
> > +         m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
> >           int_registers_saved = true;
> >           gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
> >         }
> > @@ -9870,6 +9872,9 @@ ix86_expand_epilogue (int style)
> >    /* SEH requires the use of pops to identify the epilogue.  */
> >    else if (TARGET_SEH)
> >      restore_regs_via_mov = false;
> > +  /* If we already save reg with pushp, don't use move at epilogue.  */
> > +  else if (m->fs.apx_ppx_used)
> > +    restore_regs_via_mov = false;
> >    /* If we're only restoring one register and sp cannot be used then
> >       using a move instruction to restore the register since it's
> >       less work than reloading sp and popping the register.  */
> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> > index 147b12cd014..0c5292e1d64 100644
> > --- a/gcc/config/i386/i386.h
> > +++ b/gcc/config/i386/i386.h
> > @@ -2693,6 +2693,10 @@ struct GTY(()) machine_frame_state
> >       The flags realigned and sp_realigned are mutually exclusive.  */
> >    BOOL_BITFIELD sp_realigned : 1;
> >
> > +  /* When APX_PPX used in prologue, force epilogue to emit
> > +  popp instead of move and leave.  */
> > +  BOOL_BITFIELD apx_ppx_used : 1;
> > +
> >    /* If sp_realigned is set, this is the last valid offset from the CFA
> >       that can be used for access with the frame pointer.  */
> >    HOST_WIDE_INT sp_realigned_fp_last;
> > diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-2.c 
> > b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> > new file mode 100644
> > index 00000000000..42a95340b55
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> > @@ -0,0 +1,14 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O1 -mapx-features=ppx -fno-omit-frame-pointer" } */
> > +
> > +/* { dg-final { scan-assembler "pushp" } } */
> > +/* { dg-final { scan-assembler "popp" } } */
> > +/* { dg-final { scan-assembler-not "leave" } } */
> > +
> > +extern int bar (int a);
> > +extern int *q;
> > +
> > +void foo (int *a)
> > +{
> > +  q[2] = bar (q[1]);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-3.c 
> > b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> > new file mode 100644
> > index 00000000000..76931fbe294
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> > @@ -0,0 +1,7 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O2 -mapx-features=ppx" } */
> > +
> > +/* { dg-final { scan-assembler-not "pushp" } } */
> > +/* { dg-final { scan-assembler-not "popp" } } */
> > +
> > +#include "eh_return-2.c"
> > --
> > 2.31.1
> >
>
>
> --
> BR,
> Hongtao



-- 
BR,
Hongtao

Reply via email to