The stack that save-restore reserves is not well accumulated in stack allocation and deallocation. This patch allows less instructions to be used in stack allocation and deallocation if save-restore enabled.
before patch: bar: call t0,__riscv_save_4 addi sp,sp,-64 ... li t0,-12288 addi t0,t0,-1968 # optimized out after patch add sp,sp,t0 # prologue ... li t0,12288 # epilogue addi t0,t0,2000 # optimized out after patch add sp,sp,t0 ... addi sp,sp,32 tail __riscv_restore_4 after patch: bar: call t0,__riscv_save_4 addi sp,sp,-2032 ... li t0,-12288 add sp,sp,t0 # prologue ... li t0,12288 # epilogue add sp,sp,t0 ... addi sp,sp,2032 tail __riscv_restore_4 gcc/ChangeLog: * config/riscv/riscv.cc (riscv_expand_prologue): consider save-restore in stack allocation. (riscv_expand_epilogue): consider save-restore in stack deallocation. gcc/testsuite/ChangeLog: * gcc.target/riscv/stack_save_restore.c: New test. --- gcc/config/riscv/riscv.cc | 50 ++++++++++--------- .../gcc.target/riscv/stack_save_restore.c | 40 +++++++++++++++ 2 files changed, 66 insertions(+), 24 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/stack_save_restore.c diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index f0bbcd6d6be..a50f2303032 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -5010,12 +5010,12 @@ void riscv_expand_prologue (void) { struct riscv_frame_info *frame = &cfun->machine->frame; - poly_int64 size = frame->total_size; + poly_int64 remaining_size = frame->total_size; unsigned mask = frame->mask; rtx insn; if (flag_stack_usage_info) - current_function_static_stack_size = constant_lower_bound (size); + current_function_static_stack_size = constant_lower_bound (remaining_size); if (cfun->machine->naked_p) return; @@ -5026,7 +5026,7 @@ riscv_expand_prologue (void) rtx dwarf = NULL_RTX; dwarf = riscv_adjust_libcall_cfi_prologue (); - size -= frame->save_libcall_adjustment; + remaining_size -= frame->save_libcall_adjustment; insn = emit_insn (riscv_gen_gpr_save_insn (frame)); frame->mask = 0; /* Temporarily fib that we need not save GPRs. */ @@ -5037,16 +5037,14 @@ riscv_expand_prologue (void) /* Save the registers. */ if ((frame->mask | frame->fmask) != 0) { - HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size); - if (size.is_constant ()) - step1 = MIN (size.to_constant(), step1); + HOST_WIDE_INT step1 = riscv_first_stack_step (frame, remaining_size); insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-step1)); RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; - size -= step1; - riscv_for_each_saved_reg (size, riscv_save_reg, false, false); + remaining_size -= step1; + riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false, false); } frame->mask = mask; /* Undo the above fib. */ @@ -5055,29 +5053,29 @@ riscv_expand_prologue (void) if (frame_pointer_needed) { insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, - GEN_INT ((frame->hard_frame_pointer_offset - size).to_constant ())); + GEN_INT ((frame->hard_frame_pointer_offset - remaining_size).to_constant ())); RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; riscv_emit_stack_tie (); } /* Allocate the rest of the frame. */ - if (known_gt (size, 0)) + if (known_gt (remaining_size, 0)) { /* Two step adjustment: 1.scalable frame. 2.constant frame. */ poly_int64 scalable_frame (0, 0); - if (!size.is_constant ()) + if (!remaining_size.is_constant ()) { /* First for scalable frame. */ - poly_int64 scalable_frame = size; - scalable_frame.coeffs[0] = size.coeffs[1]; + poly_int64 scalable_frame = remaining_size; + scalable_frame.coeffs[0] = remaining_size.coeffs[1]; riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame, false); - size -= scalable_frame; + remaining_size -= scalable_frame; } /* Second step for constant frame. */ - HOST_WIDE_INT constant_frame = size.to_constant (); + HOST_WIDE_INT constant_frame = remaining_size.to_constant (); if (constant_frame == 0) return; @@ -5142,6 +5140,8 @@ riscv_expand_epilogue (int style) HOST_WIDE_INT step2 = 0; bool use_restore_libcall = ((style == NORMAL_RETURN) && riscv_use_save_libcall (frame)); + unsigned libcall_size = use_restore_libcall ? + frame->save_libcall_adjustment : 0; rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); rtx insn; @@ -5212,13 +5212,18 @@ riscv_expand_epilogue (int style) REG_NOTES (insn) = dwarf; } + if (use_restore_libcall) + frame->mask = 0; /* Temporarily fib for GPRs. */ + /* If we need to restore registers, deallocate as much stack as possible in the second step without going out of range. */ if ((frame->mask | frame->fmask) != 0) - { - step2 = riscv_first_stack_step (frame, frame->total_size); - step1 -= step2; - } + step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size); + + if (use_restore_libcall) + frame->mask = mask; /* Undo the above fib. */ + + step1 -= step2 + libcall_size; /* Set TARGET to BASE + STEP1. */ if (known_gt (step1, 0)) @@ -5272,15 +5277,12 @@ riscv_expand_epilogue (int style) frame->mask = 0; /* Temporarily fib that we need not save GPRs. */ /* Restore the registers. */ - riscv_for_each_saved_reg (frame->total_size - step2, riscv_restore_reg, + riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size, + riscv_restore_reg, true, style == EXCEPTION_RETURN); if (use_restore_libcall) - { frame->mask = mask; /* Undo the above fib. */ - gcc_assert (step2 >= frame->save_libcall_adjustment); - step2 -= frame->save_libcall_adjustment; - } if (need_barrier_p) riscv_emit_stack_tie (); diff --git a/gcc/testsuite/gcc.target/riscv/stack_save_restore.c b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c new file mode 100644 index 00000000000..522e706cfbf --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32imafc -mabi=ilp32f -msave-restore -O2 -fno-schedule-insns -fno-schedule-insns2 -fno-unroll-loops -fno-peel-loops -fno-lto" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +char my_getchar(); +float getf(); + +/* +**bar: +** call t0,__riscv_save_4 +** addi sp,sp,-2032 +** ... +** li t0,-12288 +** add sp,sp,t0 +** ... +** li t0,12288 +** add sp,sp,t0 +** ... +** addi sp,sp,2032 +** tail __riscv_restore_4 +*/ +int bar() +{ + float volatile farray[3568]; + + float sum = 0; + float f1 = getf(); + float f2 = getf(); + float f3 = getf(); + float f4 = getf(); + + for (int i = 0; i < 3568; i++) + { + farray[i] = my_getchar() * 1.2; + sum += farray[i]; + } + + return sum + f1 + f2 + f3 + f4; +} + -- 2.17.1