Le 27/08/2019 à 05:30, Nicholas Piggin a écrit :
System call entry and particularly exit code is beyond the limit of what
is reasonable to implement in asm.
This conversion moves all conditional branches out of the asm code,
except a relatively simple test to see whether all GPRs should be
restored at exit time.
Null syscall test is about 5% faster after this patch, because the exit
work is handled under local_irq_disable, and the hard mask and pending
interrupt replay is handled after that, which avoids games with MSR.
Interesting optimisation.
The asm instruction scheduling has not really been analysed and
optimised yet, as there are higher level features and improvements to
add first.
Signed-off-by: Nicholas Piggin <npig...@gmail.com>
---
arch/powerpc/include/asm/asm-prototypes.h | 11 -
arch/powerpc/include/asm/ptrace.h | 3 +
arch/powerpc/include/asm/signal.h | 2 +
arch/powerpc/include/asm/switch_to.h | 4 +
arch/powerpc/include/asm/time.h | 3 +
arch/powerpc/kernel/Makefile | 3 +-
arch/powerpc/kernel/entry_64.S | 343 ++++------------------
arch/powerpc/kernel/process.c | 6 +-
arch/powerpc/kernel/signal.h | 2 -
arch/powerpc/kernel/syscall_64.c | 202 +++++++++++++
10 files changed, 271 insertions(+), 308 deletions(-)
create mode 100644 arch/powerpc/kernel/syscall_64.c
[...]
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 24621e7e5033..f444525da9ce 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1609,7 +1609,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned
long usp,
extern void ret_from_kernel_thread(void);
void (*f)(void);
unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
- struct thread_info *ti = task_thread_info(p);
klp_init_thread_info(p);
@@ -1617,6 +1616,8 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
sp -= sizeof(struct pt_regs);
childregs = (struct pt_regs *) sp;
if (unlikely(p->flags & PF_KTHREAD)) {
+ struct thread_info *ti = task_thread_info(p);
+
/* kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
childregs->gpr[1] = sp + sizeof(struct pt_regs);
@@ -1634,12 +1635,13 @@ int copy_thread_tls(unsigned long clone_flags, unsigned
long usp,
} else {
/* user thread */
struct pt_regs *regs = current_pt_regs();
+
CHECK_FULL_REGS(regs);
*childregs = *regs;
if (usp)
childregs->gpr[1] = usp;
p->thread.regs = childregs;
- childregs->gpr[3] = 0; /* Result from fork() */
+ /* ret_from_fork sets fork() result to 0 */
Does PPC32 ret_from_fork() do it as well ?
if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_PPC64
if (!is_32bit_task())
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 800433685888..d396efca4068 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -10,8 +10,6 @@
#ifndef _POWERPC_ARCH_SIGNAL_H
#define _POWERPC_ARCH_SIGNAL_H
-extern void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
-
extern void __user *get_sigframe(struct ksignal *ksig, unsigned long sp,
size_t frame_size, int is_32);
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
new file mode 100644
index 000000000000..98ed970796d5
--- /dev/null
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -0,0 +1,202 @@
+#include <linux/err.h>
+#include <asm/hw_irq.h>
+#include <asm/paca.h>
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+#include <asm/signal.h>
+#include <asm/switch_to.h>
+#include <asm/syscall.h>
+#include <asm/time.h>
+
+extern void __noreturn tabort_syscall(void);
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+static inline void account_cpu_user_entry(void)
+{
+ unsigned long tb = mftb();
+
+ local_paca->accounting.utime += (tb -
local_paca->accounting.starttime_user);
+ local_paca->accounting.starttime = tb;
+}
+static inline void account_cpu_user_exit(void)
+{
+ unsigned long tb = mftb();
+
+ local_paca->accounting.stime += (tb - local_paca->accounting.starttime);
+ local_paca->accounting.starttime_user = tb;
+}
+#else
+static inline void account_cpu_user_entry(void)
+{
+}
+static inline void account_cpu_user_exit(void)
+{
+}
+#endif
I think this block should go in arch/powerpc/include/asm/cputime.h, we
should limit #ifdefs as much as possible in C files.
And use get_accounting() instead of local_paca->accounting in order to
be usable on PPC32 as well
+
+typedef long (*syscall_fn)(long, long, long, long, long, long);
+
+long system_call_exception(long r3, long r4, long r5, long r6, long r7, long
r8, unsigned long r0, struct pt_regs *regs)
+{
+ unsigned long ti_flags;
+ syscall_fn f;
+
+ BUG_ON(!(regs->msr & MSR_PR));
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (unlikely(regs->msr & MSR_TS_T))
+ tabort_syscall();
+#endif
MSR_TS_T and tabort_syscall() are declared regardless of
CONFIG_PPC_TRANSACTIONAL_MEM so IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM)
should be used instead of #ifdef
+
+ account_cpu_user_entry();
+
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ struct lppaca *lp = get_lppaca();
+
+ if (unlikely(local_paca->dtl_ridx != lp->dtl_idx))
+ accumulate_stolen_time();
+ }
+#endif
Same here, I think everything is available so IS_ENABLED() should be
used instead of #if
+
+#ifdef CONFIG_PPC_KUAP_DEBUG
+ if (mmu_has_feature(MMU_FTR_RADIX_KUAP))
+ WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
+#endif
This should go in a helper in one of the kup/kuap header files.
+
+ /*
+ * A syscall should always be called with interrupts enabled
+ * so we just unconditionally hard-enable here. When some kind
+ * of irq tracing is used, we additionally check that condition
+ * is correct
+ */
+#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)
+ WARN_ON(irq_soft_mask_return() != IRQS_ENABLED);
+ WARN_ON(local_paca->irq_happened);
+#endif
Can we use IS_ENABLED() here as well ?
+
+ __hard_irq_enable();
+
+ /*
+ * We do need to set SOFTE in the stack frame or the return
+ * from interrupt will be painful
+ */
+ regs->softe = IRQS_ENABLED;
+
+ ti_flags = current_thread_info()->flags;
+ if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
+ /*
+ * We use the return value of do_syscall_trace_enter() as the
+ * syscall number. If the syscall was rejected for any reason
+ * do_syscall_trace_enter() returns an invalid syscall number
+ * and the test below against NR_syscalls will fail.
+ */
+ r0 = do_syscall_trace_enter(regs);
+ }
+
+ if (unlikely(r0 >= NR_syscalls))
+ return -ENOSYS;
+
+ /* May be faster to do array_index_nospec? */
+ barrier_nospec();
+
+ if (unlikely(ti_flags & _TIF_32BIT)) {
+ f = (void *)compat_sys_call_table[r0];
+
+ r3 &= 0x00000000ffffffffULL;
+ r4 &= 0x00000000ffffffffULL;
+ r5 &= 0x00000000ffffffffULL;
+ r6 &= 0x00000000ffffffffULL;
+ r7 &= 0x00000000ffffffffULL;
+ r8 &= 0x00000000ffffffffULL;
+
+ } else {
+ f = (void *)sys_call_table[r0];
+ }
+
+ return f(r3, r4, r5, r6, r7, r8);
+}
+
+unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs)
+{
+ unsigned long *ti_flagsp = ¤t_thread_info()->flags;
+ unsigned long ti_flags;
+ unsigned long ret = 0;
+
+ regs->result = r3;
+
+ /* Check whether the syscall is issued inside a restartable sequence */
+ rseq_syscall(regs);
+
+ ti_flags = *ti_flagsp;
+ if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE))
+ do_syscall_trace_leave(regs);
+
+ if (unlikely(r3 >= (unsigned long)-MAX_ERRNO)) {
+ if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
+ r3 = -r3;
+ regs->ccr |= 0x10000000; /* Set SO bit in CR */
+ }
+ }
+
+ if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
+ if (ti_flags & _TIF_RESTOREALL)
+ ret = _TIF_RESTOREALL;
+ else
+ regs->gpr[3] = r3;
+ clear_bits(_TIF_PERSYSCALL_MASK, ti_flagsp);
+ } else {
+ regs->gpr[3] = r3;
+ }
+
+again:
+ local_irq_disable();
+ ti_flags = READ_ONCE(*ti_flagsp);
+ while (unlikely(ti_flags & _TIF_USER_WORK_MASK)) {
+ local_irq_enable();
+ if (ti_flags & _TIF_NEED_RESCHED) {
+ schedule();
+ } else {
+ /*
+ * SIGPENDING must restore signal handler function
+ * argument GPRs, and some non-volatiles (e.g., r1).
+ * Restore all for now. This could be made lighter.
+ */
+ if (ti_flags & _TIF_SIGPENDING)
+ ret |= _TIF_RESTOREALL;
+ do_notify_resume(regs, ti_flags);
+ }
+ local_irq_disable();
+ ti_flags = READ_ONCE(*ti_flagsp);
+ }
+
+#ifdef CONFIG_ALTIVEC
+ if ((regs->msr & (MSR_FP|MSR_VEC)) != (MSR_FP|MSR_VEC))
+#else
+ if ((regs->msr & MSR_FP) != MSR_FP)
+#endif
Use 'if (IS_ENABLED(CONFIG_ALTIVEC)) / else' instead of an
#ifdef/#else/#endif
+ restore_math(regs);
+
+ /* This pattern matches prep_irq_for_idle */
+ __mtmsrd(0, 1); /* Disable MSR_EE and MSR_RI */
+ if (unlikely(lazy_irq_pending())) {
+ __mtmsrd(MSR_RI, 1);
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ local_irq_enable();
+ /* Took an interrupt which may have more exit work to do. */
+ goto again;
+ }
+ trace_hardirqs_on();
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+ irq_soft_mask_set(IRQS_ENABLED);
+
+ account_cpu_user_exit();
+
+#ifdef CONFIG_PPC_KUAP_DEBUG
+ if (mmu_has_feature(MMU_FTR_RADIX_KUAP))
+ WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
+#endif
Same, define a helper in the kuap header files to avoid the #ifdefs and
platform specif stuff here.
+
+ return ret;
+}
+
Christophe