Enable the syscall entry and exit path from generic framework. Signed-off-by: Mukesh Kumar Chaurasiya <mchau...@linux.ibm.com> --- arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/interrupt.c | 46 +++++++---- arch/powerpc/kernel/ptrace/ptrace.c | 103 ------------------------ arch/powerpc/kernel/signal.c | 8 +- arch/powerpc/kernel/syscall.c | 117 +--------------------------- 5 files changed, 38 insertions(+), 237 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 6722625a406a0..45b70ccf7c89e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -202,6 +202,7 @@ config PPC select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES if PPC_BARRIER_NOSPEC select GENERIC_EARLY_IOREMAP + select GENERIC_ENTRY select GENERIC_GETTIMEOFDAY select GENERIC_IDLE_POLL_SETUP select GENERIC_IOREMAP diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 7f31f3fb9c1d8..8731064631de0 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/context_tracking.h> +#include <linux/entry-common.h> #include <linux/err.h> #include <linux/compat.h> #include <linux/rseq.h> @@ -163,15 +164,10 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, unsigned long ret = 0; bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; - CT_WARN_ON(ct_state() == CT_STATE_USER); - kuap_assert_locked(); regs->result = r3; - /* Check whether the syscall is issued inside a restartable sequence */ - rseq_syscall(regs); - ti_flags = read_thread_flags(); if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { @@ -192,13 +188,27 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, } if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { - do_syscall_trace_leave(regs); ret |= _TIF_RESTOREALL; } - local_irq_disable(); - ret = interrupt_exit_user_prepare_main(ret, regs); +again: + syscall_exit_to_user_mode(regs); + + user_enter_irqoff(); + if (!prep_irq_for_enabled_exit(true)) { + user_exit_irqoff(); + local_irq_enable(); + local_irq_disable(); + goto again; + } + + /* Restore user access locks last */ + kuap_user_restore(regs); + if (unlikely((local_paca->generic_fw_flags & GFW_RESTORE_ALL) == GFW_RESTORE_ALL)) { + ret |= _TIF_RESTOREALL; + local_paca->generic_fw_flags &= ~GFW_RESTORE_ALL; + } #ifdef CONFIG_PPC64 regs->exit_result = ret; #endif @@ -209,6 +219,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, #ifdef CONFIG_PPC64 notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs) { + unsigned long ret = 0; /* * This is called when detecting a soft-pending interrupt as well as * an alternate-return interrupt. So we can't just have the alternate @@ -222,14 +233,23 @@ notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *reg #ifdef CONFIG_PPC_BOOK3S_64 set_kuap(AMR_KUAP_BLOCKED); #endif +again: + syscall_exit_to_user_mode(regs); - trace_hardirqs_off(); - user_exit_irqoff(); - account_cpu_user_entry(); + user_enter_irqoff(); + if (!prep_irq_for_enabled_exit(true)) { + user_exit_irqoff(); + local_irq_enable(); + local_irq_disable(); + goto again; + } - BUG_ON(!user_mode(regs)); + if (unlikely((local_paca->generic_fw_flags & GFW_RESTORE_ALL) == GFW_RESTORE_ALL)) { + ret = _TIF_RESTOREALL; + local_paca->generic_fw_flags &= ~GFW_RESTORE_ALL; + } - regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs); + regs->exit_result |= ret; return regs->exit_result; } diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index c6997df632873..2a2b0b94a3eaa 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -21,9 +21,6 @@ #include <asm/switch_to.h> #include <asm/debug.h> -#define CREATE_TRACE_POINTS -#include <trace/events/syscalls.h> - #include "ptrace-decl.h" /* @@ -233,106 +230,6 @@ static int do_seccomp(struct pt_regs *regs) static inline int do_seccomp(struct pt_regs *regs) { return 0; } #endif /* CONFIG_SECCOMP */ -/** - * do_syscall_trace_enter() - Do syscall tracing on kernel entry. - * @regs: the pt_regs of the task to trace (current) - * - * Performs various types of tracing on syscall entry. This includes seccomp, - * ptrace, syscall tracepoints and audit. - * - * The pt_regs are potentially visible to userspace via ptrace, so their - * contents is ABI. - * - * One or more of the tracers may modify the contents of pt_regs, in particular - * to modify arguments or even the syscall number itself. - * - * It's also possible that a tracer can choose to reject the system call. In - * that case this function will return an illegal syscall number, and will put - * an appropriate return value in regs->r3. - * - * Return: the (possibly changed) syscall number. - */ -long do_syscall_trace_enter(struct pt_regs *regs) -{ - u32 flags; - - flags = read_thread_flags() & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE); - - if (flags) { - int rc = ptrace_report_syscall_entry(regs); - - if (unlikely(flags & _TIF_SYSCALL_EMU)) { - /* - * A nonzero return code from - * ptrace_report_syscall_entry() tells us to prevent - * the syscall execution, but we are not going to - * execute it anyway. - * - * Returning -1 will skip the syscall execution. We want - * to avoid clobbering any registers, so we don't goto - * the skip label below. - */ - return -1; - } - - if (rc) { - /* - * The tracer decided to abort the syscall. Note that - * the tracer may also just change regs->gpr[0] to an - * invalid syscall number, that is handled below on the - * exit path. - */ - goto skip; - } - } - - /* Run seccomp after ptrace; allow it to set gpr[3]. */ - if (do_seccomp(regs)) - return -1; - - /* Avoid trace and audit when syscall is invalid. */ - if (regs->gpr[0] >= NR_syscalls) - goto skip; - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_enter(regs, regs->gpr[0]); - - if (!is_32bit_task()) - audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4], - regs->gpr[5], regs->gpr[6]); - else - audit_syscall_entry(regs->gpr[0], - regs->gpr[3] & 0xffffffff, - regs->gpr[4] & 0xffffffff, - regs->gpr[5] & 0xffffffff, - regs->gpr[6] & 0xffffffff); - - /* Return the possibly modified but valid syscall number */ - return regs->gpr[0]; - -skip: - /* - * If we are aborting explicitly, or if the syscall number is - * now invalid, set the return value to -ENOSYS. - */ - regs->gpr[3] = -ENOSYS; - return -1; -} - -void do_syscall_trace_leave(struct pt_regs *regs) -{ - int step; - - audit_syscall_exit(regs); - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_exit(regs, regs->result); - - step = test_thread_flag(TIF_SINGLESTEP); - if (step || test_thread_flag(TIF_SYSCALL_TRACE)) - ptrace_report_syscall_exit(regs, step); -} - void __init pt_regs_check(void); /* diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 719930cf4ae1f..8e1a1b26b5eae 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -6,6 +6,7 @@ * Extracted from signal_32.c and signal_64.c */ +#include <linux/entry-common.h> #include <linux/resume_user_mode.h> #include <linux/signal.h> #include <linux/uprobes.h> @@ -22,11 +23,6 @@ #include "signal.h" -/* This will be removed */ -#ifdef CONFIG_GENERIC_ENTRY -#include <linux/entry-common.h> -#endif /* CONFIG_GENERIC_ENTRY */ - #ifdef CONFIG_VSX unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) @@ -374,11 +370,9 @@ void signal_fault(struct task_struct *tsk, struct pt_regs *regs, task_pid_nr(tsk), where, ptr, regs->nip, regs->link); } -#ifdef CONFIG_GENERIC_ENTRY void arch_do_signal_or_restart(struct pt_regs *regs) { BUG_ON(regs != current->thread.regs); local_paca->generic_fw_flags |= GFW_RESTORE_ALL; do_signal(current); } -#endif /* CONFIG_GENERIC_ENTRY */ diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c index 9f03a6263fb41..66fd6ca4462b0 100644 --- a/arch/powerpc/kernel/syscall.c +++ b/arch/powerpc/kernel/syscall.c @@ -3,6 +3,7 @@ #include <linux/compat.h> #include <linux/context_tracking.h> #include <linux/randomize_kstack.h> +#include <linux/entry-common.h> #include <asm/interrupt.h> #include <asm/kup.h> @@ -21,121 +22,9 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0) kuap_lock(); add_random_kstack_offset(); + r0 = syscall_enter_from_user_mode(regs, r0); - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); - - trace_hardirqs_off(); /* finish reconciling */ - - CT_WARN_ON(ct_state() == CT_STATE_KERNEL); - user_exit_irqoff(); - - BUG_ON(regs_is_unrecoverable(regs)); - BUG_ON(!user_mode(regs)); - BUG_ON(regs_irqs_disabled(regs)); - -#ifdef CONFIG_PPC_PKEY - if (mmu_has_feature(MMU_FTR_PKEY)) { - unsigned long amr, iamr; - bool flush_needed = false; - /* - * When entering from userspace we mostly have the AMR/IAMR - * different from kernel default values. Hence don't compare. - */ - amr = mfspr(SPRN_AMR); - iamr = mfspr(SPRN_IAMR); - regs->amr = amr; - regs->iamr = iamr; - if (mmu_has_feature(MMU_FTR_KUAP)) { - mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); - flush_needed = true; - } - if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { - mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); - flush_needed = true; - } - if (flush_needed) - isync(); - } else -#endif - kuap_assert_locked(); - - booke_restore_dbcr0(); - - account_cpu_user_entry(); - - account_stolen_time(); - - /* - * This is not required for the syscall exit path, but makes the - * stack frame look nicer. If this was initialised in the first stack - * frame, or if the unwinder was taught the first stack frame always - * returns to user with IRQS_ENABLED, this store could be avoided! - */ - irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); - - /* - * If system call is called with TM active, set _TIF_RESTOREALL to - * prevent RFSCV being used to return to userspace, because POWER9 - * TM implementation has problems with this instruction returning to - * transactional state. Final register values are not relevant because - * the transaction will be aborted upon return anyway. Or in the case - * of unsupported_scv SIGILL fault, the return state does not much - * matter because it's an edge case. - */ - if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && - unlikely(MSR_TM_TRANSACTIONAL(regs->msr))) - set_bits(_TIF_RESTOREALL, ¤t_thread_info()->flags); - - /* - * If the system call was made with a transaction active, doom it and - * return without performing the system call. Unless it was an - * unsupported scv vector, in which case it's treated like an illegal - * instruction. - */ -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) && - !trap_is_unsupported_scv(regs)) { - /* Enable TM in the kernel, and disable EE (for scv) */ - hard_irq_disable(); - mtmsr(mfmsr() | MSR_TM); - - /* tabort, this dooms the transaction, nothing else */ - asm volatile(".long 0x7c00071d | ((%0) << 16)" - :: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)); - - /* - * Userspace will never see the return value. Execution will - * resume after the tbegin. of the aborted transaction with the - * checkpointed register state. A context switch could occur - * or signal delivered to the process before resuming the - * doomed transaction context, but that should all be handled - * as expected. - */ - return -ENOSYS; - } -#endif // CONFIG_PPC_TRANSACTIONAL_MEM - - local_irq_enable(); - - if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) { - if (unlikely(trap_is_unsupported_scv(regs))) { - /* Unsupported scv vector */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return regs->gpr[3]; - } - /* - * We use the return value of do_syscall_trace_enter() as the - * syscall number. If the syscall was rejected for any reason - * do_syscall_trace_enter() returns an invalid syscall number - * and the test against NR_syscalls will fail and the return - * value to be used is in regs->gpr[3]. - */ - r0 = do_syscall_trace_enter(regs); - if (unlikely(r0 >= NR_syscalls)) - return regs->gpr[3]; - - } else if (unlikely(r0 >= NR_syscalls)) { + if (unlikely(r0 >= NR_syscalls)) { if (unlikely(trap_is_unsupported_scv(regs))) { /* Unsupported scv vector */ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); -- 2.49.0