Le 27/08/2019 à 15:55, Nicholas Piggin a écrit :
powerpc has an optimisation where interrupts avoid saving the
non-volatile (or callee saved) registers to the interrupt stack frame if
they are not required.
Two problems with this are that an interrupt does not always know
whether it will need non-volatiles; and if it does need them, they can
only be saved from the entry-scoped asm code (because we don't control
what the C compiler does with these registers).
system calls are the most difficult: some system calls always require
all registers (e.g., fork, to copy regs into the child). Sometimes
registers are only required under certain conditions (e.g., tracing,
signal delivery). These cases require ugly logic in the call chains
(e.g., ppc_fork), and require a lot of logic to be implemented in asm.
Do you really find it ugly to just call function nvgprs() before calling
sys_fork() ? I guess there are things a lot uglier.
So remove the optimisation for system calls, and always save NVGPRs on
entry. Modern high performance CPUs are not so sensitive, because the
stores are dense in cache and can be hidden by other expensive work in
the syscall path -- the null syscall selftests benchmark on POWER9 is
not slowed (124.40ns before and 123.64ns after, i.e., within the noise).
I did the test on PPC32:
On an 885, null_syscall reports 2227ns (132MHz)
If saving non-volatile regs, it goes to 2419, ie +8.6%
On an 8321, null_syscall reports 1021ns (333MHz)
If saving non-volatile regs, it goes to 1100, ie +7.7%
So unless going to C compensates this degradation, I guess it is not
worth it on PPC32.
Other interrupts retain the NVGPR optimisation for now.
Signed-off-by: Nicholas Piggin <npig...@gmail.com>
---
Changes since v1:
- Improve changelog
- Fix clone3 spu table entry (Segher)
arch/powerpc/kernel/entry_64.S | 72 +++++-------------------
arch/powerpc/kernel/syscalls/syscall.tbl | 22 +++++---
2 files changed, 28 insertions(+), 66 deletions(-)
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 6467bdab8d40..5a3e0b5c9ad1 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -98,13 +98,14 @@ END_BTB_FLUSH_SECTION
std r11,_XER(r1)
std r11,_CTR(r1)
std r9,GPR13(r1)
+ SAVE_NVGPRS(r1)
mflr r10
/*
* This clears CR0.SO (bit 28), which is the error indication on
* return from this system call.
*/
rldimi r2,r11,28,(63-28)
- li r11,0xc01
+ li r11,0xc00
std r10,_LINK(r1)
std r11,_TRAP(r1)
std r3,ORIG_GPR3(r1)
@@ -323,7 +324,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/* Traced system call support */
.Lsyscall_dotrace:
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_syscall_trace_enter
@@ -408,7 +408,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
mtmsrd r10,1
#endif /* CONFIG_PPC_BOOK3E */
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_syscall_trace_leave
b ret_from_except
@@ -442,62 +441,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
_ASM_NOKPROBE_SYMBOL(system_call_common);
_ASM_NOKPROBE_SYMBOL(system_call_exit);
-/* Save non-volatile GPRs, if not already saved. */
-_GLOBAL(save_nvgprs)
- ld r11,_TRAP(r1)
- andi. r0,r11,1
- beqlr-
- SAVE_NVGPRS(r1)
- clrrdi r0,r11,1
- std r0,_TRAP(r1)
- blr
-_ASM_NOKPROBE_SYMBOL(save_nvgprs);
I see it is added back somewhere below. Why don't you leave it where it is ?
-
-
-/*
- * The sigsuspend and rt_sigsuspend system calls can call do_signal
- * and thus put the process into the stopped state where we might
- * want to examine its user state with ptrace. Therefore we need
- * to save all the nonvolatile registers (r14 - r31) before calling
- * the C code. Similarly, fork, vfork and clone need the full
- * register state on the stack so that it can be copied to the child.
- */
-
-_GLOBAL(ppc_fork)
- bl save_nvgprs
- bl sys_fork
- b .Lsyscall_exit
-
-_GLOBAL(ppc_vfork)
- bl save_nvgprs
- bl sys_vfork
- b .Lsyscall_exit
-
-_GLOBAL(ppc_clone)
- bl save_nvgprs
- bl sys_clone
- b .Lsyscall_exit
-
-_GLOBAL(ppc_clone3)
- bl save_nvgprs
- bl sys_clone3
- b .Lsyscall_exit
-
-_GLOBAL(ppc32_swapcontext)
- bl save_nvgprs
- bl compat_sys_swapcontext
- b .Lsyscall_exit
-
-_GLOBAL(ppc64_swapcontext)
- bl save_nvgprs
- bl sys_swapcontext
- b .Lsyscall_exit
-
-_GLOBAL(ppc_switch_endian)
- bl save_nvgprs
- bl sys_switch_endian
- b .Lsyscall_exit
-
_GLOBAL(ret_from_fork)
bl schedule_tail
REST_NVGPRS(r1)
@@ -516,6 +459,17 @@ _GLOBAL(ret_from_kernel_thread)
li r3,0
b .Lsyscall_exit
+/* Save non-volatile GPRs, if not already saved. */
+_GLOBAL(save_nvgprs)
+ ld r11,_TRAP(r1)
+ andi. r0,r11,1
+ beqlr-
+ SAVE_NVGPRS(r1)
+ clrrdi r0,r11,1
+ std r0,_TRAP(r1)
+ blr
+_ASM_NOKPROBE_SYMBOL(save_nvgprs);
+
Moved here.
#ifdef CONFIG_PPC_BOOK3S_64
#define FLUSH_COUNT_CACHE \
Christophe