Le 27/08/2019 à 15:55, Nicholas Piggin a écrit :
powerpc has an optimisation where interrupts avoid saving the
non-volatile (or callee saved) registers to the interrupt stack frame if
they are not required.

Two problems with this are that an interrupt does not always know
whether it will need non-volatiles; and if it does need them, they can
only be saved from the entry-scoped asm code (because we don't control
what the C compiler does with these registers).

system calls are the most difficult: some system calls always require
all registers (e.g., fork, to copy regs into the child).  Sometimes
registers are only required under certain conditions (e.g., tracing,
signal delivery). These cases require ugly logic in the call chains
(e.g., ppc_fork), and require a lot of logic to be implemented in asm.

Do you really find it ugly to just call function nvgprs() before calling sys_fork() ? I guess there are things a lot uglier.


So remove the optimisation for system calls, and always save NVGPRs on
entry. Modern high performance CPUs are not so sensitive, because the
stores are dense in cache and can be hidden by other expensive work in
the syscall path -- the null syscall selftests benchmark on POWER9 is
not slowed (124.40ns before and 123.64ns after, i.e., within the noise).

I did the test on PPC32:

On an 885, null_syscall reports 2227ns (132MHz)
If saving non-volatile regs, it goes to 2419, ie +8.6%

On an 8321, null_syscall reports 1021ns (333MHz)
If saving non-volatile regs, it goes to 1100, ie +7.7%

So unless going to C compensates this degradation, I guess it is not worth it on PPC32.


Other interrupts retain the NVGPR optimisation for now.

Signed-off-by: Nicholas Piggin <npig...@gmail.com>
---
Changes since v1:
- Improve changelog
- Fix clone3 spu table entry (Segher)

  arch/powerpc/kernel/entry_64.S           | 72 +++++-------------------
  arch/powerpc/kernel/syscalls/syscall.tbl | 22 +++++---
  2 files changed, 28 insertions(+), 66 deletions(-)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 6467bdab8d40..5a3e0b5c9ad1 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -98,13 +98,14 @@ END_BTB_FLUSH_SECTION
        std     r11,_XER(r1)
        std     r11,_CTR(r1)
        std     r9,GPR13(r1)
+       SAVE_NVGPRS(r1)
        mflr    r10
        /*
         * This clears CR0.SO (bit 28), which is the error indication on
         * return from this system call.
         */
        rldimi  r2,r11,28,(63-28)
-       li      r11,0xc01
+       li      r11,0xc00
        std     r10,_LINK(r1)
        std     r11,_TRAP(r1)
        std     r3,ORIG_GPR3(r1)
@@ -323,7 +324,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/* Traced system call support */
  .Lsyscall_dotrace:
-       bl      save_nvgprs
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      do_syscall_trace_enter
@@ -408,7 +408,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
        mtmsrd  r10,1
  #endif /* CONFIG_PPC_BOOK3E */
- bl save_nvgprs
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      do_syscall_trace_leave
        b       ret_from_except
@@ -442,62 +441,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
  _ASM_NOKPROBE_SYMBOL(system_call_common);
  _ASM_NOKPROBE_SYMBOL(system_call_exit);
-/* Save non-volatile GPRs, if not already saved. */
-_GLOBAL(save_nvgprs)
-       ld      r11,_TRAP(r1)
-       andi.   r0,r11,1
-       beqlr-
-       SAVE_NVGPRS(r1)
-       clrrdi  r0,r11,1
-       std     r0,_TRAP(r1)
-       blr
-_ASM_NOKPROBE_SYMBOL(save_nvgprs);

I see it is added back somewhere below. Why don't you leave it where it is ?

-
-       
-/*
- * The sigsuspend and rt_sigsuspend system calls can call do_signal
- * and thus put the process into the stopped state where we might
- * want to examine its user state with ptrace.  Therefore we need
- * to save all the nonvolatile registers (r14 - r31) before calling
- * the C code.  Similarly, fork, vfork and clone need the full
- * register state on the stack so that it can be copied to the child.
- */
-
-_GLOBAL(ppc_fork)
-       bl      save_nvgprs
-       bl      sys_fork
-       b       .Lsyscall_exit
-
-_GLOBAL(ppc_vfork)
-       bl      save_nvgprs
-       bl      sys_vfork
-       b       .Lsyscall_exit
-
-_GLOBAL(ppc_clone)
-       bl      save_nvgprs
-       bl      sys_clone
-       b       .Lsyscall_exit
-
-_GLOBAL(ppc_clone3)
-       bl      save_nvgprs
-       bl      sys_clone3
-       b       .Lsyscall_exit
-
-_GLOBAL(ppc32_swapcontext)
-       bl      save_nvgprs
-       bl      compat_sys_swapcontext
-       b       .Lsyscall_exit
-
-_GLOBAL(ppc64_swapcontext)
-       bl      save_nvgprs
-       bl      sys_swapcontext
-       b       .Lsyscall_exit
-
-_GLOBAL(ppc_switch_endian)
-       bl      save_nvgprs
-       bl      sys_switch_endian
-       b       .Lsyscall_exit
-
  _GLOBAL(ret_from_fork)
        bl      schedule_tail
        REST_NVGPRS(r1)
@@ -516,6 +459,17 @@ _GLOBAL(ret_from_kernel_thread)
        li      r3,0
        b       .Lsyscall_exit
+/* Save non-volatile GPRs, if not already saved. */
+_GLOBAL(save_nvgprs)
+       ld      r11,_TRAP(r1)
+       andi.   r0,r11,1
+       beqlr-
+       SAVE_NVGPRS(r1)
+       clrrdi  r0,r11,1
+       std     r0,_TRAP(r1)
+       blr
+_ASM_NOKPROBE_SYMBOL(save_nvgprs);
+

Moved here.

  #ifdef CONFIG_PPC_BOOK3S_64
#define FLUSH_COUNT_CACHE \

Christophe

Reply via email to