Hi,

According to APX spec, the pushp/popp pairs should be matched,
otherwise the PPX hint cannot take effect and cause performance loss.

In the ix86_expand_epilogue, there are several optimizations that may
cause the epilogue using mov to restore the regs. Check if PPX applied
and prevent usage of mov/leave in the epilogue.

Bootstrapped/regtested on x86_64-pc-linux-gnu.

Ok for trunk?

gcc/ChangeLog:

        * config/i386/i386.cc (ix86_expand_prologue): Set apx_ppx_used
        flag in m.fs with TARGET_APX_PPX && !crtl->calls_eh_return.
        (ix86_emit_save_regs): Emit ppx is available only when
        TARGET_APX_PPX && !crtl->calls_eh_return.
        (ix86_expand_epilogue): Don't restore reg using mov when
        apx_ppx_used flag is true.
        * config/i386/i386.h (struct machine_frame_state):
        Add apx_ppx_used flag.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/apx-ppx-2.c: New test.
        * gcc.target/i386/apx-ppx-3.c: Likewise.
---
 gcc/config/i386/i386.cc                   | 13 +++++++++----
 gcc/config/i386/i386.h                    |  4 ++++
 gcc/testsuite/gcc.target/i386/apx-ppx-2.c | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/apx-ppx-3.c |  7 +++++++
 4 files changed, 34 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-3.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index bd7411190af..99def8d4a77 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -7429,6 +7429,7 @@ ix86_emit_save_regs (void)
 {
   int regno;
   rtx_insn *insn;
+  bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
 
   if (!TARGET_APX_PUSH2POP2
       || !ix86_can_use_push2pop2 ()
@@ -7438,7 +7439,7 @@ ix86_emit_save_regs (void)
        if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
          {
            insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
-                                       TARGET_APX_PPX));
+                                       use_ppx));
            RTX_FRAME_RELATED_P (insn) = 1;
          }
     }
@@ -7469,7 +7470,7 @@ ix86_emit_save_regs (void)
                                                              regno_list[0]),
                                                 gen_rtx_REG (word_mode,
                                                              regno_list[1]),
-                                                TARGET_APX_PPX));
+                                                use_ppx));
                    RTX_FRAME_RELATED_P (insn) = 1;
                    rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
 
@@ -7502,7 +7503,7 @@ ix86_emit_save_regs (void)
            else
              {
                insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
-                                           TARGET_APX_PPX));
+                                           use_ppx));
                RTX_FRAME_RELATED_P (insn) = 1;
                aligned = true;
              }
@@ -7511,7 +7512,7 @@ ix86_emit_save_regs (void)
        {
          insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
                                                   regno_list[0]),
-                                     TARGET_APX_PPX));
+                                     use_ppx));
          RTX_FRAME_RELATED_P (insn) = 1;
        }
     }
@@ -8985,6 +8986,7 @@ ix86_expand_prologue (void)
       if (!frame.save_regs_using_mov)
        {
          ix86_emit_save_regs ();
+         m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
          int_registers_saved = true;
          gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
        }
@@ -9870,6 +9872,9 @@ ix86_expand_epilogue (int style)
   /* SEH requires the use of pops to identify the epilogue.  */
   else if (TARGET_SEH)
     restore_regs_via_mov = false;
+  /* If we already save reg with pushp, don't use move at epilogue.  */
+  else if (m->fs.apx_ppx_used)
+    restore_regs_via_mov = false;
   /* If we're only restoring one register and sp cannot be used then
      using a move instruction to restore the register since it's
      less work than reloading sp and popping the register.  */
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 147b12cd014..0c5292e1d64 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2693,6 +2693,10 @@ struct GTY(()) machine_frame_state
      The flags realigned and sp_realigned are mutually exclusive.  */
   BOOL_BITFIELD sp_realigned : 1;
 
+  /* When APX_PPX used in prologue, force epilogue to emit
+  popp instead of move and leave.  */
+  BOOL_BITFIELD apx_ppx_used : 1;
+
   /* If sp_realigned is set, this is the last valid offset from the CFA
      that can be used for access with the frame pointer.  */
   HOST_WIDE_INT sp_realigned_fp_last;
diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-2.c 
b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
new file mode 100644
index 00000000000..42a95340b55
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O1 -mapx-features=ppx -fno-omit-frame-pointer" } */
+
+/* { dg-final { scan-assembler "pushp" } } */
+/* { dg-final { scan-assembler "popp" } } */
+/* { dg-final { scan-assembler-not "leave" } } */
+
+extern int bar (int a);
+extern int *q;
+
+void foo (int *a)
+{
+  q[2] = bar (q[1]);
+}
diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-3.c 
b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
new file mode 100644
index 00000000000..76931fbe294
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mapx-features=ppx" } */
+
+/* { dg-final { scan-assembler-not "pushp" } } */
+/* { dg-final { scan-assembler-not "popp" } } */
+
+#include "eh_return-2.c"
-- 
2.31.1

Reply via email to