Loads LR earlier so that it's available for the return branch earlier. Signed-off-by: Richard Henderson <r...@twiddle.net> --- tcg/aarch64/tcg-target.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c index d391c10..05857bf 100644 --- a/tcg/aarch64/tcg-target.c +++ b/tcg/aarch64/tcg-target.c @@ -1853,13 +1853,13 @@ static void tcg_target_qemu_prologue(TCGContext *s) + (TCG_TARGET_STACK_ALIGN - 1); frame_size_tcg_locals &= ~(TCG_TARGET_STACK_ALIGN - 1); - /* Push (FP, LR) and allocate space for all saved registers. */ - tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, + /* Push (x19, x20) and allocate space for all saved registers. */ + tcg_out_insn(s, 3314, STP, TCG_REG_X19, TCG_REG_X20, TCG_REG_SP, -frame_size_callee_saved, 1, 1); - /* Store callee-preserved regs x19..x28. */ - for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { - int ofs = (r - TCG_REG_X19 + 2) * 8; + /* Store callee-preserved regs x21..x30. */ + for (r = TCG_REG_X21; r < TCG_REG_X30; r += 2) { + int ofs = (r - TCG_REG_X19) * 8; tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); } @@ -1887,14 +1887,15 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, frame_size_tcg_locals); - /* Restore registers x19..x28. */ - for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { - int ofs = (r - TCG_REG_X19 + 2) * 8; + /* Restore registers x30..x21 in reverse order. This gets LR loaded + in the first LDP, minimizing the latency to the return insn. */ + for (r = TCG_REG_X29; r >= TCG_REG_X21; r -= 2) { + int ofs = (r - TCG_REG_X19) * 8; tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); } - /* Pop (FP, LR), restore SP to previous frame. */ - tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, + /* Pop (x19, x20), restoring SP to previous frame. */ + tcg_out_insn(s, 3314, LDP, TCG_REG_X19, TCG_REG_X20, TCG_REG_SP, frame_size_callee_saved, 0, 1); tcg_out_insn(s, 3207, RET, TCG_REG_LR); } -- 1.8.5.3