This is a work in progress that goes on top of the syscalls in C patch. It's not quite complete, 64e low level exit is not taken care of, and the new return is hacked into the existing interrupt handlers pretty quickly (e.g., full gprs handling is still ugly and could be cleaned), but that code touches exception-64s.S which is under heavy modification in parallel so I will rebase on top of that before polishing it properly. I will also try to convert to more IS_ENABLED() for Christophe.
I guess syscall_64.c will change to interrupt_64.c with this. Hopefully there is no fundamental problem with it, it replaces some fairly horrific asm code with stack frame bouncing and reusing for replay, and almost imcomprehensible maze of exit points and labels. This is just a quick preview because it's booting, if anyone can spot major issue with the approach would be good. Thanks, Nick --- .../powerpc/include/asm/book3s/64/kup-radix.h | 6 + arch/powerpc/kernel/entry_64.S | 475 ++++-------------- arch/powerpc/kernel/exceptions-64s.S | 55 +- arch/powerpc/kernel/syscall_64.c | 147 +++++- arch/powerpc/kernel/vector.S | 2 +- 5 files changed, 288 insertions(+), 397 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index ef2e65ea8a73..62ff2509cf51 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -60,6 +60,12 @@ #include <asm/mmu.h> #include <asm/ptrace.h> +static inline void kuap_restore_amr(struct pt_regs *regs) +{ + if (mmu_has_feature(MMU_FTR_RADIX_KUAP)) + mtspr(SPRN_AMR, regs->kuap); +} + static inline void kuap_check_amr(void) { #ifdef CONFIG_PPC_KUAP_DEBUG diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index d2efd1a96487..f1e973789a2a 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -16,6 +16,7 @@ #include <linux/errno.h> #include <linux/err.h> +#include <asm/cache.h> #include <asm/unistd.h> #include <asm/processor.h> #include <asm/page.h> @@ -130,9 +131,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) mtspr SPRN_SRR0,r4 mtspr SPRN_SRR1,r5 -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - std r5,PACATMSCRATCH(r13) -#endif mtlr r6 cmpdi r3,0 @@ -197,6 +195,7 @@ tabort_syscall: RFI_TO_USER b . /* prevent speculative execution */ #endif + _GLOBAL(ret_from_fork) bl schedule_tail REST_NVGPRS(r1) @@ -436,409 +435,149 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) addi r1,r1,SWITCH_FRAME_SIZE blr - .align 7 -_GLOBAL(ret_from_except) - ld r11,_TRAP(r1) - andi. r0,r11,1 - bne ret_from_except_lite - REST_NVGPRS(r1) - -_GLOBAL(ret_from_except_lite) /* - * Disable interrupts so that current_thread_info()->flags - * can't change between when we test it and when we return - * from the interrupt. - */ -#ifdef CONFIG_PPC_BOOK3E - wrteei 0 -#else - li r10,MSR_RI - mtmsrd r10,1 /* Update machine state */ -#endif /* CONFIG_PPC_BOOK3E */ + * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not + * touched, AMR not set, no exit work created, then this can be used. + */ + .balign IFETCH_ALIGN_BYTES +_GLOBAL(fast_interrupt_return) + ld r4,_MSR(r1) + andi. r0,r4,MSR_PR + bne .Lfast_user_interrupt_return + andi. r0,r4,MSR_RI + bne+ .Lfast_kernel_interrupt_return + addi r3,r1,STACK_FRAME_OVERHEAD + bl unrecoverable_exception + b . /* should not get here */ - ld r9, PACA_THREAD_INFO(r13) - ld r3,_MSR(r1) -#ifdef CONFIG_PPC_BOOK3E - ld r10,PACACURRENT(r13) -#endif /* CONFIG_PPC_BOOK3E */ - ld r4,TI_FLAGS(r9) - andi. r3,r3,MSR_PR - beq resume_kernel -#ifdef CONFIG_PPC_BOOK3E - lwz r3,(THREAD+THREAD_DBCR0)(r10) -#endif /* CONFIG_PPC_BOOK3E */ + .balign IFETCH_ALIGN_BYTES +_GLOBAL(interrupt_return) + REST_NVGPRS(r1) - /* Check current_thread_info()->flags */ - andi. r0,r4,_TIF_USER_WORK_MASK - bne 1f -#ifdef CONFIG_PPC_BOOK3E - /* - * Check to see if the dbcr0 register is set up to debug. - * Use the internal debug mode bit to do this. - */ - andis. r0,r3,DBCR0_IDM@h - beq restore - mfmsr r0 - rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */ - mtmsr r0 - mtspr SPRN_DBCR0,r3 - li r10, -1 - mtspr SPRN_DBSR,r10 - b restore -#else - addi r3,r1,STACK_FRAME_OVERHEAD - bl restore_math - b restore -#endif -1: andi. r0,r4,_TIF_NEED_RESCHED - beq 2f - bl restore_interrupts - SCHEDULE_USER - b ret_from_except_lite -2: -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - andi. r0,r4,_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM - bne 3f /* only restore TM if nothing else to do */ + .balign IFETCH_ALIGN_BYTES +_GLOBAL(interrupt_return_lite) + ld r4,_MSR(r1) + andi. r0,r4,MSR_PR + beq kernel_interrupt_return +user_interrupt_return: addi r3,r1,STACK_FRAME_OVERHEAD - bl restore_tm_state - b restore -3: -#endif - bl save_nvgprs - /* - * Use a non volatile GPR to save and restore our thread_info flags - * across the call to restore_interrupts. - */ - mr r30,r4 - bl restore_interrupts - mr r4,r30 - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_notify_resume - b ret_from_except - -resume_kernel: - /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ - andis. r8,r4,_TIF_EMULATE_STACK_STORE@h - beq+ 1f + bl interrupt_exit_user_prepare + cmpdi r3,0 + bne- .Lrestore_nvgprs - addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ +.Lfast_user_interrupt_return: + ld r11,_NIP(r1) + ld r12,_MSR(r1) +BEGIN_FTR_SECTION + ld r10,_PPR(r1) + mtspr SPRN_PPR,r10 +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 - ld r3,GPR1(r1) - subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */ - mr r4,r1 /* src: current exception frame */ - mr r1,r3 /* Reroute the trampoline frame to r1 */ +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + ldarx r0,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - /* Copy from the original to the trampoline. */ - li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */ - li r6,0 /* start offset: 0 */ - mtctr r5 -2: ldx r0,r6,r4 - stdx r0,r6,r3 - addi r6,r6,8 - bdnz 2b - - /* Do real store operation to complete stdu */ - ld r5,GPR1(r1) - std r8,0(r5) - - /* Clear _TIF_EMULATE_STACK_STORE flag */ - lis r11,_TIF_EMULATE_STACK_STORE@h - addi r5,r9,TI_FLAGS -0: ldarx r4,0,r5 - andc r4,r4,r11 - stdcx. r4,0,r5 - bne- 0b -1: - -#ifdef CONFIG_PREEMPT - /* Check if we need to preempt */ - andi. r0,r4,_TIF_NEED_RESCHED - beq+ restore - /* Check that preempt_count() == 0 and interrupts are enabled */ - lwz r8,TI_PREEMPT(r9) - cmpwi cr0,r8,0 - bne restore - ld r0,SOFTE(r1) - andi. r0,r0,IRQS_DISABLED - bne restore + ld r3,_CCR(r1) + ld r4,_LINK(r1) + ld r5,_CTR(r1) + ld r6,_XER(r1) + li r0,0 - /* - * Here we are preempting the current task. We want to make - * sure we are soft-disabled first and reconcile irq state. - */ - RECONCILE_IRQ_STATE(r3,r4) - bl preempt_schedule_irq + REST_4GPRS(7, r1) + REST_2GPRS(11, r1) + REST_GPR(13, r1) - /* - * arch_local_irq_restore() from preempt_schedule_irq above may - * enable hard interrupt but we really should disable interrupts - * when we return from the interrupt, and so that we don't get - * interrupted after loading SRR0/1. - */ -#ifdef CONFIG_PPC_BOOK3E - wrteei 0 -#else - li r10,MSR_RI - mtmsrd r10,1 /* Update machine state */ -#endif /* CONFIG_PPC_BOOK3E */ -#endif /* CONFIG_PREEMPT */ + mtcr r3 + mtlr r4 + mtctr r5 + mtspr SPRN_XER,r6 - .globl fast_exc_return_irq -fast_exc_return_irq: -restore: - /* - * This is the main kernel exit path. First we check if we - * are about to re-enable interrupts - */ - ld r5,SOFTE(r1) - lbz r6,PACAIRQSOFTMASK(r13) - andi. r5,r5,IRQS_DISABLED - bne .Lrestore_irq_off + REST_4GPRS(2, r1) + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + RFI_TO_USER + b . /* prevent speculative execution */ - /* We are enabling, were we already enabled ? Yes, just return */ - andi. r6,r6,IRQS_DISABLED - beq cr0,.Ldo_restore +.Lrestore_nvgprs: + REST_NVGPRS(r1) + b .Lfast_user_interrupt_return - /* - * We are about to soft-enable interrupts (we are hard disabled - * at this point). We check if there's anything that needs to - * be replayed first. - */ - lbz r0,PACAIRQHAPPENED(r13) - cmpwi cr0,r0,0 - bne- .Lrestore_check_irq_replay + .balign IFETCH_ALIGN_BYTES +kernel_interrupt_return: + addi r3,r1,STACK_FRAME_OVERHEAD + bl interrupt_exit_kernel_prepare + cmpdi cr1,r3,0 - /* - * Get here when nothing happened while soft-disabled, just - * soft-enable and move-on. We will hard-enable as a side - * effect of rfi - */ -.Lrestore_no_replay: - TRACE_ENABLE_INTS - li r0,IRQS_ENABLED - stb r0,PACAIRQSOFTMASK(r13); +.Lfast_kernel_interrupt_return: + ld r11,_NIP(r1) + ld r12,_MSR(r1) + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 - /* - * Final return path. BookE is handled in a different file - */ -.Ldo_restore: -#ifdef CONFIG_PPC_BOOK3E - b exception_return_book3e -#else - /* - * Clear the reservation. If we know the CPU tracks the address of - * the reservation then we can potentially save some cycles and use - * a larx. On POWER6 and POWER7 this is significantly faster. - */ BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ FTR_SECTION_ELSE - ldarx r4,0,r1 + ldarx r0,0,r1 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - /* - * Some code path such as load_up_fpu or altivec return directly - * here. They run entirely hard disabled and do not alter the - * interrupt state. They also don't use lwarx/stwcx. and thus - * are known not to leave dangling reservations. - */ - .globl fast_exception_return -fast_exception_return: - ld r3,_MSR(r1) - ld r4,_CTR(r1) - ld r0,_LINK(r1) - mtctr r4 - mtlr r0 - ld r4,_XER(r1) - mtspr SPRN_XER,r4 - - kuap_check_amr r5, r6 - - REST_8GPRS(5, r1) - - andi. r0,r3,MSR_RI - beq- .Lunrecov_restore + ld r3,_CCR(r1) + ld r4,_LINK(r1) + ld r5,_CTR(r1) + ld r6,_XER(r1) + li r0,0 - /* - * Clear RI before restoring r13. If we are returning to - * userspace and we take an exception after restoring r13, - * we end up corrupting the userspace r13 value. - */ - li r4,0 - mtmsrd r4,1 + REST_4GPRS(7, r1) + REST_2GPRS(11, r1) -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* TM debug */ - std r3, PACATMSCRATCH(r13) /* Stash returned-to MSR */ -#endif - /* - * r13 is our per cpu area, only restore it if we are returning to - * userspace the value stored in the stack frame may belong to - * another CPU. - */ - andi. r0,r3,MSR_PR - beq 1f -BEGIN_FTR_SECTION - /* Restore PPR */ - ld r2,_PPR(r1) - mtspr SPRN_PPR,r2 -END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - ACCOUNT_CPU_USER_EXIT(r13, r2, r4) - REST_GPR(13, r1) + bne- cr1,1f /* emulate stack store */ + mtcr r3 + mtlr r4 + mtctr r5 + mtspr SPRN_XER,r6 /* - * We don't need to restore AMR on the way back to userspace for KUAP. - * The value of AMR only matters while we're in the kernel. + * Leaving a stale exception_marker on the stack can confuse + * the reliable stack unwinder later on. Clear it. */ - mtspr SPRN_SRR1,r3 + std r0,STACK_FRAME_OVERHEAD-16(r1) - ld r2,_CCR(r1) - mtcrf 0xFF,r2 - ld r2,_NIP(r1) - mtspr SPRN_SRR0,r2 - - ld r0,GPR0(r1) - ld r2,GPR2(r1) - ld r3,GPR3(r1) - ld r4,GPR4(r1) - ld r1,GPR1(r1) - RFI_TO_USER + REST_4GPRS(2, r1) + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + RFI_TO_KERNEL b . /* prevent speculative execution */ -1: mtspr SPRN_SRR1,r3 - - ld r2,_CCR(r1) - mtcrf 0xFF,r2 - ld r2,_NIP(r1) - mtspr SPRN_SRR0,r2 +1: mtcr r3 + mtlr r4 + mtctr r5 + mtspr SPRN_XER,r6 /* * Leaving a stale exception_marker on the stack can confuse * the reliable stack unwinder later on. Clear it. */ - li r2,0 - std r2,STACK_FRAME_OVERHEAD-16(r1) + std r0,STACK_FRAME_OVERHEAD-16(r1) - ld r0,GPR0(r1) - ld r2,GPR2(r1) - ld r3,GPR3(r1) + REST_4GPRS(2, r1) + REST_GPR(6, r1) + REST_GPR(0, r1) - kuap_restore_amr r4 + /* Nasty emulate stack store case. */ + std r9,PACA_EXGEN+0(r13) + addi r9,r1,INT_FRAME_SIZE /* get original r1 */ + REST_GPR(1, r1) + std r9,0(r1) + ld r9,PACA_EXGEN+0(r13) - ld r4,GPR4(r1) - ld r1,GPR1(r1) RFI_TO_KERNEL b . /* prevent speculative execution */ -#endif /* CONFIG_PPC_BOOK3E */ - - /* - * We are returning to a context with interrupts soft disabled. - * - * However, we may also about to hard enable, so we need to - * make sure that in this case, we also clear PACA_IRQ_HARD_DIS - * or that bit can get out of sync and bad things will happen - */ -.Lrestore_irq_off: - ld r3,_MSR(r1) - lbz r7,PACAIRQHAPPENED(r13) - andi. r0,r3,MSR_EE - beq 1f - rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS - stb r7,PACAIRQHAPPENED(r13) -1: -#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG) - /* The interrupt should not have soft enabled. */ - lbz r7,PACAIRQSOFTMASK(r13) -1: tdeqi r7,IRQS_ENABLED - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING -#endif - b .Ldo_restore - - /* - * Something did happen, check if a re-emit is needed - * (this also clears paca->irq_happened) - */ -.Lrestore_check_irq_replay: - /* XXX: We could implement a fast path here where we check - * for irq_happened being just 0x01, in which case we can - * clear it and return. That means that we would potentially - * miss a decrementer having wrapped all the way around. - * - * Still, this might be useful for things like hash_page - */ - bl __check_irq_replay - cmpwi cr0,r3,0 - beq .Lrestore_no_replay - - /* - * We need to re-emit an interrupt. We do so by re-using our - * existing exception frame. We first change the trap value, - * but we need to ensure we preserve the low nibble of it - */ - ld r4,_TRAP(r1) - clrldi r4,r4,60 - or r4,r4,r3 - std r4,_TRAP(r1) - - /* - * PACA_IRQ_HARD_DIS won't always be set here, so set it now - * to reconcile the IRQ state. Tracing is already accounted for. - */ - lbz r4,PACAIRQHAPPENED(r13) - ori r4,r4,PACA_IRQ_HARD_DIS - stb r4,PACAIRQHAPPENED(r13) - - /* - * Then find the right handler and call it. Interrupts are - * still soft-disabled and we keep them that way. - */ - cmpwi cr0,r3,0x500 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl do_IRQ - b ret_from_except -1: cmpwi cr0,r3,0xf00 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl performance_monitor_exception - b ret_from_except -1: cmpwi cr0,r3,0xe60 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl handle_hmi_exception - b ret_from_except -1: cmpwi cr0,r3,0x900 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl timer_interrupt - b ret_from_except -#ifdef CONFIG_PPC_DOORBELL -1: -#ifdef CONFIG_PPC_BOOK3E - cmpwi cr0,r3,0x280 -#else - cmpwi cr0,r3,0xa00 -#endif /* CONFIG_PPC_BOOK3E */ - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl doorbell_exception -#endif /* CONFIG_PPC_DOORBELL */ -1: b ret_from_except /* What else to do here ? */ - -.Lunrecov_restore: - addi r3,r1,STACK_FRAME_OVERHEAD - bl unrecoverable_exception - b .Lunrecov_restore - -_ASM_NOKPROBE_SYMBOL(ret_from_except); -_ASM_NOKPROBE_SYMBOL(ret_from_except_lite); -_ASM_NOKPROBE_SYMBOL(resume_kernel); -_ASM_NOKPROBE_SYMBOL(fast_exc_return_irq); -_ASM_NOKPROBE_SYMBOL(restore); -_ASM_NOKPROBE_SYMBOL(fast_exception_return); - - #ifdef CONFIG_PPC_RTAS /* * On CHRP, the Run-Time Abstraction Services (RTAS) have to be diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 768f133de4f1..51223299d22d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -696,7 +696,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) RECONCILE_IRQ_STATE(r10, r11); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ bl hdlr; \ - b ret_from_except + b interrupt_return /* * Like EXC_COMMON, but for exceptions that can occur in the idle task and @@ -706,11 +706,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) EXC_COMMON_BEGIN(name); \ EXCEPTION_COMMON(PACA_EXGEN, realvec); \ FINISH_NAP; \ + bl save_nvgprs; \ RECONCILE_IRQ_STATE(r10, r11); \ RUNLATCH_ON; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ bl hdlr; \ - b ret_from_except_lite + b interrupt_return /* @@ -1058,7 +1059,7 @@ EXC_COMMON_BEGIN(machine_check_common) bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_exception - b ret_from_except + b interrupt_return #define MACHINE_CHECK_HANDLER_WINDUP \ /* Clear MSR_RI before setting SRR0 and SRR1. */\ @@ -1301,7 +1302,7 @@ BEGIN_MMU_FTR_SECTION bl do_slb_fault cmpdi r3,0 bne- 1f - b fast_exception_return + b fast_interrupt_return 1: /* Error case */ MMU_FTR_SECTION_ELSE /* Radix case, access is outside page table range */ @@ -1314,7 +1315,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) ld r5,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault - b ret_from_except + b interrupt_return EXC_REAL(instruction_access, 0x400, 0x80) @@ -1350,7 +1351,7 @@ BEGIN_MMU_FTR_SECTION bl do_slb_fault cmpdi r3,0 bne- 1f - b fast_exception_return + b fast_interrupt_return 1: /* Error case */ MMU_FTR_SECTION_ELSE /* Radix case, access is outside page table range */ @@ -1363,7 +1364,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) ld r5,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault - b ret_from_except + b interrupt_return EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) @@ -1416,7 +1417,7 @@ EXC_COMMON_BEGIN(alignment_common) RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl alignment_exception - b ret_from_except + b interrupt_return EXC_REAL(program_check, 0x700, 0x100) @@ -1454,7 +1455,7 @@ EXC_COMMON_BEGIN(program_check_common) RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl program_check_exception - b ret_from_except + b interrupt_return EXC_REAL(fp_unavailable, 0x800, 0x100) @@ -1479,14 +1480,14 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif bl load_up_fpu - b fast_exception_return + b fast_interrupt_return #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl fp_unavailable_tm - b ret_from_except + b interrupt_return #endif @@ -1676,7 +1677,7 @@ BEGIN_MMU_FTR_SECTION MMU_FTR_SECTION_ELSE bl unknown_exception ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) - b ret_from_except + b interrupt_return EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0x20) @@ -1744,7 +1745,7 @@ EXC_COMMON_BEGIN(hmi_exception_common) RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl handle_hmi_exception - b ret_from_except + b interrupt_return EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20, IRQS_DISABLED) EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80, IRQS_DISABLED) @@ -1792,14 +1793,14 @@ BEGIN_FTR_SECTION END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69) #endif bl load_up_altivec - b fast_exception_return + b fast_interrupt_return #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_tm - b ret_from_except + b interrupt_return #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) @@ -1808,7 +1809,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_exception - b ret_from_except + b interrupt_return EXC_REAL_OOL(vsx_unavailable, 0xf40, 0x20) @@ -1835,7 +1836,7 @@ BEGIN_FTR_SECTION RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_tm - b ret_from_except + b interrupt_return #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) @@ -1844,7 +1845,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_exception - b ret_from_except + b interrupt_return EXC_REAL_OOL(facility_unavailable, 0xf60, 0x20) @@ -2046,7 +2047,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl soft_nmi_interrupt - b ret_from_except + b interrupt_return #else /* CONFIG_PPC_WATCHDOG */ #define MASKED_DEC_HANDLER_LABEL 2f /* normal return */ @@ -2329,7 +2330,7 @@ do_hash_page: cmpdi r3,0 /* see if __hash_page succeeded */ /* Success */ - beq fast_exc_return_irq /* Return from exception on success */ + beq interrupt_return_lite /* Return from exception on success */ /* Error */ blt- 13f @@ -2344,16 +2345,16 @@ handle_page_fault: bne- handle_dabr_fault ld r4,_DAR(r1) ld r5,_DSISR(r1) + bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl do_page_fault cmpdi r3,0 - beq+ ret_from_except_lite - bl save_nvgprs + beq+ interrupt_return_lite mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD lwz r4,_DAR(r1) bl bad_page_fault - b ret_from_except + b interrupt_return /* We have a data breakpoint exception - handle it */ handle_dabr_fault: @@ -2365,9 +2366,9 @@ handle_dabr_fault: /* * do_break() may have changed the NV GPRS while handling a breakpoint. * If so, we need to restore them with their updated values. Don't use - * ret_from_except_lite here. + * interrupt_return_lite here. */ - b ret_from_except + b interrupt_return #ifdef CONFIG_PPC_BOOK3S_64 @@ -2379,7 +2380,7 @@ handle_dabr_fault: addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) bl low_hash_fault - b ret_from_except + b interrupt_return #endif /* @@ -2394,7 +2395,7 @@ handle_dabr_fault: addi r3,r1,STACK_FRAME_OVERHEAD li r5,SIGSEGV bl bad_page_fault - b ret_from_except + b interrupt_return /* * When doorbell is triggered from system reset wakeup, the message is diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index d42519b86ddd..1e7c3e47a8b8 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -161,13 +161,17 @@ unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs) __mtmsrd(MSR_RI, 1); local_paca->irq_happened |= PACA_IRQ_HARD_DIS; local_irq_enable(); - /* Took an interrupt which may have more exit work to do. */ + /* Took an interrupt, may have more exit work to do. */ goto again; } trace_hardirqs_on(); local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; irq_soft_mask_set(IRQS_ENABLED); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + local_paca->tm_scratch = regs->msr; +#endif + kuap_check_amr(); account_cpu_user_exit(); @@ -175,3 +179,144 @@ unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs) return ret; } +unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr) +{ +#ifdef CONFIG_PPC_BOOK3E + struct thread_struct *ts = ¤t->thread; +#endif + unsigned long *ti_flagsp = ¤t_thread_info()->flags; + unsigned long ti_flags; + unsigned long flags; + unsigned long ret = 0; + + BUG_ON(!FULL_REGS(regs)); + BUG_ON(!(regs->msr & MSR_RI)); + BUG_ON(regs->softe != IRQS_ENABLED); + + local_irq_save(flags); + +again: + ti_flags = READ_ONCE(*ti_flagsp); + while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { + local_irq_enable(); /* returning to user: may enable */ + if (ti_flags & _TIF_NEED_RESCHED) { + schedule(); + } else { + if (ti_flags & _TIF_SIGPENDING) + ret |= _TIF_RESTOREALL; + do_notify_resume(regs, ti_flags); + } + local_irq_disable(); + ti_flags = READ_ONCE(*ti_flagsp); + } + +#ifdef CONFIG_PPC_BOOK3S + if (IS_ENABLED(CONFIG_PPC_FPU)) { + unsigned long mathflags = MSR_FP; + + if (IS_ENABLED(CONFIG_ALTIVEC)) + mathflags |= MSR_VEC; + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (ti_flags & _TIF_RESTORE_TM) + restore_tm_state(regs); + else +#endif + if ((regs->msr & mathflags) != mathflags) + restore_math(regs); + } +#endif + + __mtmsrd(0, 1); /* Disable MSR_EE and MSR_RI */ + if (unlikely(lazy_irq_pending())) { + __mtmsrd(MSR_RI, 1); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + local_irq_enable(); + local_irq_disable(); + /* Took an interrupt, may have more exit work to do. */ + goto again; + } + trace_hardirqs_on(); + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + irq_soft_mask_set(IRQS_ENABLED); + +#ifdef CONFIG_PPC_BOOK3E + if (unlikely(ts->debug.debug.dbcr0 & DBCR0_IDM)) { + /* + * Check to see if the dbcr0 register is set up to debug. + * Use the internal debug mode bit to do this. + */ + mtmsr(mfmsr() & ~MSR_DE); + mtspr(SPRN_DBCR0, ts->debug.debug.dbcr0); + mtspr(SPRN_DBSR, -1); + } +#endif + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + local_paca->tm_scratch = regs->msr; +#endif + + kuap_check_amr(); + + account_cpu_user_exit(); + + return ret; +} + +void unrecoverable_exception(struct pt_regs *regs); +void preempt_schedule_irq(void); + + +unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr) +{ + unsigned long *ti_flagsp = ¤t_thread_info()->flags; + unsigned long flags; + + BUG_ON(!FULL_REGS(regs)); + local_irq_save(flags); + + if (unlikely(!(regs->msr & MSR_RI))) + unrecoverable_exception(regs); + +again: + if (IS_ENABLED(CONFIG_PREEMPT)) { + /* Return to preemptible kernel context */ + if (unlikely(*ti_flagsp & _TIF_NEED_RESCHED)) { + if (preempt_count() == 0 && regs->softe == IRQS_ENABLED) + preempt_schedule_irq(); + } + } + + __mtmsrd(0, 1); /* Disable MSR_EE and MSR_RI */ + if (regs->softe == IRQS_ENABLED) { + if (unlikely(lazy_irq_pending())) { + __mtmsrd(MSR_RI, 1); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + local_irq_enable(); + local_irq_disable(); + /* Took an interrupt, may have more exit work to do. */ + goto again; + } + trace_hardirqs_on(); + irq_soft_mask_set(IRQS_ENABLED); + } + if (regs->msr & MSR_EE) + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + local_paca->tm_scratch = regs->msr; +#endif + + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ + kuap_restore_amr(regs); + + if (unlikely(*ti_flagsp & _TIF_EMULATE_STACK_STORE)) { + clear_bits(_TIF_EMULATE_STACK_STORE, ti_flagsp); + return 1; + } + return 0; +} + diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 8eb867dbad5f..44e7a776e56f 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -131,7 +131,7 @@ _GLOBAL(load_up_vsx) /* enable use of VSX after return */ oris r12,r12,MSR_VSX@h std r12,_MSR(r1) - b fast_exception_return + b fast_interrupt_return #endif /* CONFIG_VSX */ -- 2.22.0