currently we are generating sub-optimal epilogue when there
is frame pointer and there is outgoing area.
take gcc.target/aarch64/test_frame_12.c for example:
the epilogue for test_12 is:
.L12:
sub sp, x29, #16
ldp x29, x30, [sp, 16]
add sp, sp, 432
ret
while the optimized version should be:
.L12:
add sp, x29, 0
ldp x29, x30, [sp], 416
ret
when there is frame pointer, it is set up to point to base address
of our reg save area in prologue, so in epilogue we could utilize
this feature, and skip outgoing if there is, thus we could always utilize
load write-back for stack adjustment when there is frame pointer.
ok to install?
thanks.
gcc/
* config/aarch64/aarch64.c (aarch64_expand_epilogue): Don't subtract
outgoing area size when restore stack_pointer_rtx.
gcc/testsuite/
* gcc.target/aarch64/test_frame_12.c: Match optimized instruction sequences.
>From 9d8cbfa071df773ef5edfed499c0dc90be8eebfa Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.w...@arm.com>
Date: Tue, 17 Jun 2014 22:19:33 +0100
Subject: [PATCH 14/19] [AArch64/GCC][15/20] Optimize epilogue when there is
frame pointer
currently we are generating sub-optimal epilogue when there
is frame pointer and there is outgoing area.
take gcc.target/aarch64/test_frame_12.c for example:
the epilogue for test_12 is:
.L12:
sub sp, x29, #16
ldp x29, x30, [sp, 16]
add sp, sp, 432
ret
while the optimized version should be:
.L12:
add sp, x29, 0
ldp x29, x30, [sp], 416
ret
when there is frame pointer, it is set up to point to base address of our
reg save area in prologue, so in epilogue we could utilize this feature,
and skip outgoing if there is, thus we could always utilize load write-back
for stack adjustment when there is frame pointer.
2014-06-16 Jiong Wang <jiong.w...@arm.com>
Marcus Shawcroft <marcus.shawcr...@arm.com>
gcc/
* config/aarch64/aarch64.c (aarch64_expand_epilogue): Don't subtract
outgoing area size when restore stack_pointer_rtx.
gcc/testsuite/
* gcc.target/aarch64/test_frame_12.c: Match optimized instruction sequences.
---
gcc/config/aarch64/aarch64.c | 24 +++++++---------------
gcc/testsuite/gcc.target/aarch64/test_frame_12.c | 4 ++++
2 files changed, 11 insertions(+), 17 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 425c865..65a84e8 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2360,7 +2360,8 @@ aarch64_expand_epilogue (bool for_sibcall)
{
insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
hard_frame_pointer_rtx,
- GEN_INT (- fp_offset)));
+ GEN_INT (0)));
+ offset = offset - fp_offset;
RTX_FRAME_RELATED_P (insn) = 1;
/* As SP is set to (FP - fp_offset), according to the rules in
dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
@@ -2368,27 +2369,16 @@ aarch64_expand_epilogue (bool for_sibcall)
cfa_reg = stack_pointer_rtx;
}
- aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM);
+ aarch64_restore_callee_saves (DFmode, frame_pointer_needed ? 0 : fp_offset,
+ V0_REGNUM, V31_REGNUM);
if (offset > 0)
{
if (frame_pointer_needed)
{
- if (fp_offset)
- {
- aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM,
- R30_REGNUM);
- insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
- GEN_INT (offset)));
- RTX_FRAME_RELATED_P (insn) = 1;
- }
- else
- {
- aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM,
- R28_REGNUM);
- aarch64_popwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset,
- cfa_reg);
- }
+ aarch64_restore_callee_saves (DImode, 0, R0_REGNUM, R28_REGNUM);
+ aarch64_popwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset,
+ cfa_reg);
}
else
{
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_12.c b/gcc/testsuite/gcc.target/aarch64/test_frame_12.c
index 3649527..81f0070 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_12.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_12.c
@@ -12,4 +12,8 @@ t_frame_pattern_outgoing (test12, 400, , 8, a[8])
t_frame_run (test12)
/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */
+
+/* Check epilogue using write-back. */
+/* { dg-final { scan-assembler-times "ldp\tx29, x30, \\\[sp\\\], \[0-9\]+" 3 } } */
+
/* { dg-final { cleanup-saved-temps } } */
--
1.7.9.5