Add the fields sp_realigned and sp_realigned_offset to struct
machine_frame_state.  We now have the concept of the stack pointer being
re-aligned rather than invalid.  The inline functions sp_valid_at and
fp_valid_at are added to test if a given location relative to the CFA
can be accessed with the stack or frame pointer, respectively.

Stack allocation prior to re-alignment is modified so that we allocate
what is needed, but don't allocate unneeded space in the event that no
SSE registers are saved, but frame.sse_reg_save_offset is increased for
alignment.

As this change only alters how SSE registers are saved, moving the
re-alignment AND should not hinder parallelization of int register saves.

Signed-off-by: Daniel Santos <daniel.san...@pobox.com>
---
 gcc/config/i386/i386.c | 74 +++++++++++++++++++++++++++++++++++++-------------
 gcc/config/i386/i386.h | 11 ++++++++
 2 files changed, 66 insertions(+), 19 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 31f69c92968..7923486157d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12783,6 +12783,24 @@ choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT 
offset)
   return len;
 }
 
+/* Determine if the stack pointer is valid for accessing the cfa_offset.  */
+
+static inline bool sp_valid_at (HOST_WIDE_INT cfa_offset)
+{
+  const struct machine_frame_state &fs = cfun->machine->fs;
+  return fs.sp_valid && !(fs.sp_realigned
+                         && cfa_offset < fs.sp_realigned_offset);
+}
+
+/* Determine if the frame pointer is valid for accessing the cfa_offset.  */
+
+static inline bool fp_valid_at (HOST_WIDE_INT cfa_offset)
+{
+  const struct machine_frame_state &fs = cfun->machine->fs;
+  return fs.fp_valid && !(fs.sp_valid && fs.sp_realigned
+                         && cfa_offset >= fs.sp_realigned_offset);
+}
+
 /* Return an RTX that points to CFA_OFFSET within the stack frame.
    The valid base registers are taken from CFUN->MACHINE->FS.  */
 
@@ -13081,15 +13099,18 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx 
offset,
     {
       HOST_WIDE_INT ooffset = m->fs.sp_offset;
       bool valid = m->fs.sp_valid;
+      bool realigned = m->fs.sp_realigned;
 
       if (src == hard_frame_pointer_rtx)
        {
          valid = m->fs.fp_valid;
+         realigned = false;
          ooffset = m->fs.fp_offset;
        }
       else if (src == crtl->drap_reg)
        {
          valid = m->fs.drap_valid;
+         realigned = false;
          ooffset = 0;
        }
       else
@@ -13103,6 +13124,7 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx 
offset,
 
       m->fs.sp_offset = ooffset - INTVAL (offset);
       m->fs.sp_valid = valid;
+      m->fs.sp_realigned = realigned;
     }
 }
 
@@ -13852,6 +13874,7 @@ ix86_expand_prologue (void)
      this is fudged; we're interested to offsets within the local frame.  */
   m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
   m->fs.sp_valid = true;
+  m->fs.sp_realigned = false;
 
   ix86_compute_frame_layout (&frame);
 
@@ -14068,11 +14091,10 @@ ix86_expand_prologue (void)
         that we must allocate the size of the register save area before
         performing the actual alignment.  Otherwise we cannot guarantee
         that there's enough storage above the realignment point.  */
-      if (m->fs.sp_offset != frame.sse_reg_save_offset)
+      allocate = frame.stack_realign_allocate_offset - m->fs.sp_offset;
+      if (allocate)
         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
-                                  GEN_INT (m->fs.sp_offset
-                                           - frame.sse_reg_save_offset),
-                                  -1, false);
+                                  GEN_INT (-allocate), -1, false);
 
       /* Align the stack.  */
       insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
@@ -14080,11 +14102,19 @@ ix86_expand_prologue (void)
                                        GEN_INT (-align_bytes)));
 
       /* For the purposes of register save area addressing, the stack
-         pointer is no longer valid.  As for the value of sp_offset,
-        see ix86_compute_frame_layout, which we need to match in order
-        to pass verification of stack_pointer_offset at the end.  */
+        pointer can no longer be used to access anything in the frame
+        below m->fs.sp_realigned_offset and the frame pointer cannot be
+        used for anything at or above.  */
       m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
-      m->fs.sp_valid = false;
+      m->fs.sp_realigned = true;
+      m->fs.sp_realigned_offset = m->fs.sp_offset - frame.nsseregs * 16;
+      gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
+      /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
+        is needed to describe where a register is saved using a realigned
+        stack pointer, so we need to invalidate the stack pointer for that
+        target.  */
+      if (TARGET_SEH)
+       m->fs.sp_valid = false;
     }
 
   allocate = frame.stack_pointer_offset - m->fs.sp_offset;
@@ -14423,6 +14453,7 @@ ix86_emit_leave (void)
 
   gcc_assert (m->fs.fp_valid);
   m->fs.sp_valid = true;
+  m->fs.sp_realigned = false;
   m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
   m->fs.fp_valid = false;
 
@@ -14523,9 +14554,10 @@ ix86_expand_epilogue (int style)
   ix86_finalize_stack_realign_flags ();
   ix86_compute_frame_layout (&frame);
 
-  m->fs.sp_valid = (!frame_pointer_needed
-                   || (crtl->sp_is_unchanging
-                       && !stack_realign_fp));
+  m->fs.sp_realigned = stack_realign_fp;
+  m->fs.sp_valid = stack_realign_fp
+                  || !frame_pointer_needed
+                  || crtl->sp_is_unchanging;
   gcc_assert (!m->fs.sp_valid
              || m->fs.sp_offset == frame.stack_pointer_offset);
 
@@ -14575,10 +14607,10 @@ ix86_expand_epilogue (int style)
   /* SEH requires the use of pops to identify the epilogue.  */
   else if (TARGET_SEH)
     restore_regs_via_mov = false;
-  /* If we're only restoring one register and sp is not valid then
+  /* If we're only restoring one register and sp cannot be used then
      using a move instruction to restore the register since it's
      less work than reloading sp and popping the register.  */
-  else if (!m->fs.sp_valid && frame.nregs <= 1)
+  else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
     restore_regs_via_mov = true;
   else if (TARGET_EPILOGUE_USING_MOVE
           && cfun->machine->use_fast_prologue_epilogue
@@ -14603,7 +14635,7 @@ ix86_expand_epilogue (int style)
         the stack pointer, if we will restore via sp.  */
       if (TARGET_64BIT
          && m->fs.sp_offset > 0x7fffffff
-         && !(m->fs.fp_valid || m->fs.drap_valid)
+         && !(fp_valid_at (frame.stack_realign_offset) || m->fs.drap_valid)
          && (frame.nsseregs + frame.nregs) != 0)
        {
          pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
@@ -14689,6 +14721,7 @@ ix86_expand_epilogue (int style)
            }
          m->fs.sp_offset = UNITS_PER_WORD;
          m->fs.sp_valid = true;
+         m->fs.sp_realigned = false;
        }
     }
   else
@@ -14710,10 +14743,11 @@ ix86_expand_epilogue (int style)
        }
 
       /* First step is to deallocate the stack frame so that we can
-        pop the registers.  Also do it on SEH target for very large
-        frame as the emitted instructions aren't allowed by the ABI in
-        epilogues.  */
-      if (!m->fs.sp_valid
+        pop the registers.  If the stack pointer was realigned, it needs
+        to be restored now.  Also do it on SEH target for very large
+        frame as the emitted instructions aren't allowed by the ABI
+        in epilogues.  */
+      if (!m->fs.sp_valid || m->fs.sp_realigned
          || (TARGET_SEH
              && (m->fs.sp_offset - frame.reg_save_offset
                  >= SEH_MAX_FRAME_SIZE)))
@@ -14741,7 +14775,8 @@ ix86_expand_epilogue (int style)
     {
       /* If the stack pointer is valid and pointing at the frame
         pointer store address, then we only need a pop.  */
-      if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
+      if (sp_valid_at (frame.hfp_save_offset)
+         && m->fs.sp_offset == frame.hfp_save_offset)
        ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
       /* Leave results in shorter dependency chains on CPUs that are
         able to grok it fast.  */
@@ -14795,6 +14830,7 @@ ix86_expand_epilogue (int style)
      be possible to merge the local stack deallocation with the
      deallocation forced by ix86_static_chain_on_stack.   */
   gcc_assert (m->fs.sp_valid);
+  gcc_assert (!m->fs.sp_realigned);
   gcc_assert (!m->fs.fp_valid);
   gcc_assert (!m->fs.realigned);
   if (m->fs.sp_offset != UNITS_PER_WORD)
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 9e5f4d857d9..4e4cb7ca7e3 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2482,6 +2482,17 @@ struct GTY(()) machine_frame_state
      set, the SP/FP offsets above are relative to the aligned frame
      and not the CFA.  */
   BOOL_BITFIELD realigned : 1;
+
+  /* Indicates whether the stack pointer has been re-aligned.  When set,
+     SP/FP continue to be relative to the CFA, but the stack pointer
+     should only be used for offsets >= sp_realigned_offset, while
+     the frame pointer should be used for offsets < sp_realigned_offset.
+     The flags realigned and sp_realigned are mutually exclusive.  */
+  BOOL_BITFIELD sp_realigned : 1;
+
+  /* If sp_realigned is set, this is the offset from the CFA that the
+     stack pointer was realigned to.  */
+  HOST_WIDE_INT sp_realigned_offset;
 };
 
 /* Private to winnt.c.  */
-- 
2.11.0

Reply via email to