From: Russell Currey <rus...@russell.cc> Kernel Userspace Access Prevention utilises a feature of the Radix MMU which disallows read and write access to userspace addresses. By utilising this, the kernel is prevented from accessing user data from outside of trusted paths that perform proper safety checks, such as copy_{to/from}_user() and friends.
Userspace access is disabled from early boot and is only enabled when performing an operation like copy_{to/from}_user(). The register that controls this (AMR) does not prevent userspace from accessing itself, so there is no need to save and restore when entering and exiting userspace. When entering the kernel from the kernel we save AMR and if it is not blocking user access (because eg. we faulted doing a user access) we reblock user access for the duration of the exception (ie. the page fault) and then restore the AMR when returning back to the kernel. This feature has a slight performance impact which I roughly measured to be 3% slower in the worst case (performing 1GB of 1 byte read()/write() syscalls), and is gated behind the CONFIG_PPC_KUAP option for performance-critical builds. This feature can be tested by using the lkdtm driver (CONFIG_LKDTM=y) and performing the following: # (echo ACCESS_USERSPACE) > [debugfs]/provoke-crash/DIRECT If enabled, this should send SIGSEGV to the thread. mpe: - Drop the unused paca flags. - Zero the UAMOR to be safe. - Save the AMR when we enter the kernel from the kernel and then block user access again if it's not already blocked. - Restore on the way back to the kernel. - This means we handle nesting of interrupts properly, ie. we are protected inside the page fault handler caused by a user access. - Add paranoid checking of AMR in switch and syscall return. - Add isync()'s around AMR writes as per the ISA. - Support selectively disabling read or write, with no support for nesting. Co-authored-by: Michael Ellerman <m...@ellerman.id.au> Signed-off-by: Russell Currey <rus...@russell.cc> Signed-off-by: Michael Ellerman <m...@ellerman.id.au> --- v5: - On kernel entry check if the AMR is already blocking user access and if so don't do the mtspr again, because it's slow (pointed out by Nick) (in kuap_save_amr_and_lock). - Rework the constants to make the asm a bit cleaner and avoid any hard coded shifts. - Selectively disable read or write, we don't support separately nesting read/write access (use allow_user_access() instead) and shouldn't need to (famous last words). - Add isync() before & after setting AMR in set_kuap() as per the ISA. We'll investigate whether they are both really needed in future. - Don't touch the AMR in hmi_exception_early() it never goes to virtual mode. - Check the full value in kuap_check_amr v4: - Drop the unused paca flags. - Zero the UAMOR to be safe. - Save the AMR when we enter the kernel from the kernel and then lock it again. - Restore on the way back to the kernel. - That means we handle nesting of interrupts properly, ie. we are protected inside the page fault handler caused by a user access. - Add paranoid checking of AMR in switch and syscall return. - Add an isync() to prevent_user_access() .../powerpc/include/asm/book3s/64/kup-radix.h | 107 ++++++++++++++++++ arch/powerpc/include/asm/exception-64s.h | 2 + arch/powerpc/include/asm/feature-fixups.h | 3 + arch/powerpc/include/asm/kup.h | 4 + arch/powerpc/include/asm/mmu.h | 10 +- arch/powerpc/kernel/entry_64.S | 27 ++++- arch/powerpc/kernel/exceptions-64s.S | 3 + arch/powerpc/mm/pgtable-radix.c | 18 +++ arch/powerpc/mm/pkeys.c | 1 + arch/powerpc/platforms/Kconfig.cputype | 8 ++ 10 files changed, 180 insertions(+), 3 deletions(-) create mode 100644 arch/powerpc/include/asm/book3s/64/kup-radix.h diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h new file mode 100644 index 000000000000..3d60b04fc3f6 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H +#define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H + +#include <linux/const.h> + +#define AMR_KUAP_BLOCK_READ UL(0x4000000000000000) +#define AMR_KUAP_BLOCK_WRITE UL(0x8000000000000000) +#define AMR_KUAP_BLOCKED (AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE) +#define AMR_KUAP_SHIFT 62 + +#ifdef __ASSEMBLY__ + +.macro kuap_restore_amr gpr +#ifdef CONFIG_PPC_KUAP + BEGIN_MMU_FTR_SECTION_NESTED(67) + ld \gpr, STACK_REGS_KUAP(r1) + mtspr SPRN_AMR, \gpr + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) +#endif +.endm + +.macro kuap_check_amr gpr1 gpr2 +#ifdef CONFIG_PPC_KUAP_DEBUG + BEGIN_MMU_FTR_SECTION_NESTED(67) + mfspr \gpr1, SPRN_AMR + li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT) + sldi \gpr2, \gpr2, AMR_KUAP_SHIFT +999: tdne \gpr1, \gpr2 + EMIT_BUG_ENTRY 999b,__FILE__,__LINE__, \ + (BUGFLAG_WARNING|BUGFLAG_ONCE) + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) +#endif +.endm + +.macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr +#ifdef CONFIG_PPC_KUAP + BEGIN_MMU_FTR_SECTION_NESTED(67) + .ifnb \msr_pr_cr + bne \msr_pr_cr, 99f + .endif + mfspr \gpr1, SPRN_AMR + std \gpr1, STACK_REGS_KUAP(r1) + li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT) + sldi \gpr2, \gpr2, AMR_KUAP_SHIFT + cmpd \use_cr, \gpr1, \gpr2 + beq \use_cr, 99f + // We don't isync here because we very recently entered via rfid + mtspr SPRN_AMR, \gpr2 + isync +99: + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) +#endif +.endm + +#else /* !__ASSEMBLY__ */ + +#ifdef CONFIG_PPC_KUAP + +#include <asm/reg.h> + +/* + * We support individually allowing read or write, but we don't support nesting + * because that would require an expensive read/modify write of the AMR. + */ + +static inline void set_kuap(unsigned long value) +{ + if (!mmu_has_feature(MMU_FTR_RADIX_KUAP)) + return; + + /* + * ISA v3.0B says we need a CSI (Context Synchronising Instruction) both + * before and after the move to AMR. See table 6 on page 1134. + */ + isync(); + mtspr(SPRN_AMR, value); + isync(); +} + +static inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size) +{ + set_kuap(0); +} + +static inline void allow_read_from_user(const void __user *from, unsigned long size) +{ + set_kuap(AMR_KUAP_BLOCK_WRITE); +} + +static inline void allow_write_to_user(void __user *to, unsigned long size) +{ + set_kuap(AMR_KUAP_BLOCK_READ); +} + +static inline void prevent_user_access(void __user *to, const void __user *from, + unsigned long size) +{ + set_kuap(AMR_KUAP_BLOCKED); +} + +#endif /* CONFIG_PPC_KUAP */ + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 937bb630093f..bef4e05a6823 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -497,6 +497,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) RESTORE_CTR(r1, area); \ b bad_stack; \ 3: EXCEPTION_PROLOG_COMMON_1(); \ + kuap_save_amr_and_lock r9, r10, cr1, cr0; \ beq 4f; /* if from kernel mode */ \ ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \ SAVE_PPR(area, r9); \ @@ -691,6 +692,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CTRL) */ #define EXCEPTION_COMMON_NORET_STACK(area, trap, label, hdlr, additions) \ EXCEPTION_PROLOG_COMMON_1(); \ + kuap_save_amr_and_lock r9, r10, cr1; \ EXCEPTION_PROLOG_COMMON_2(area); \ EXCEPTION_PROLOG_COMMON_3(trap); \ /* Volatile regs are potentially clobbered here */ \ diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 40a6c9261a6b..f6fc31f8baff 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -100,6 +100,9 @@ label##5: \ #define END_MMU_FTR_SECTION(msk, val) \ END_MMU_FTR_SECTION_NESTED(msk, val, 97) +#define END_MMU_FTR_SECTION_NESTED_IFSET(msk, label) \ + END_MMU_FTR_SECTION_NESTED((msk), (msk), label) + #define END_MMU_FTR_SECTION_IFSET(msk) END_MMU_FTR_SECTION((msk), (msk)) #define END_MMU_FTR_SECTION_IFCLR(msk) END_MMU_FTR_SECTION((msk), 0) diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 4410625f4364..f79d4d970852 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -2,6 +2,10 @@ #ifndef _ASM_POWERPC_KUP_H_ #define _ASM_POWERPC_KUP_H_ +#ifdef CONFIG_PPC64 +#include <asm/book3s/64/kup-radix.h> +#endif + #ifndef __ASSEMBLY__ #include <asm/pgtable.h> diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index d34ad1657d7b..59acb4418164 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -107,6 +107,11 @@ */ #define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000) +/* + * Supports KUAP (key 0 controlling userspace addresses) on radix + */ +#define MMU_FTR_RADIX_KUAP ASM_CONST(0x80000000) + /* MMU feature bit sets for various CPUs */ #define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \ MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2 @@ -164,7 +169,10 @@ enum { #endif #ifdef CONFIG_PPC_RADIX_MMU MMU_FTR_TYPE_RADIX | -#endif +#ifdef CONFIG_PPC_KUAP + MMU_FTR_RADIX_KUAP | +#endif /* CONFIG_PPC_KUAP */ +#endif /* CONFIG_PPC_RADIX_MMU */ 0, }; diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 15c67d2c0534..020d0ad9aa51 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -46,6 +46,7 @@ #include <asm/exception-64e.h> #endif #include <asm/feature-fixups.h> +#include <asm/kup.h> /* * System calls. @@ -120,6 +121,9 @@ END_BTB_FLUSH_SECTION addi r9,r1,STACK_FRAME_OVERHEAD ld r11,exception_marker@toc(r2) std r11,-16(r9) /* "regshere" marker */ + + kuap_check_amr r10 r11 + #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR) BEGIN_FW_FTR_SECTION beq 33f @@ -275,6 +279,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) andi. r6,r8,MSR_PR ld r4,_LINK(r1) + kuap_check_amr r10 r11 + #ifdef CONFIG_PPC_BOOK3S /* * Clear MSR_RI, MSR_EE is already and remains disabled. We could do @@ -296,6 +302,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) std r8, PACATMSCRATCH(r13) #endif + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ ld r2,GPR2(r1) ld r1,GPR1(r1) @@ -306,8 +316,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) RFI_TO_USER b . /* prevent speculative execution */ - /* exit to kernel */ -1: ld r2,GPR2(r1) +1: /* exit to kernel */ + kuap_restore_amr r2 + + ld r2,GPR2(r1) ld r1,GPR1(r1) mtlr r4 mtcr r5 @@ -594,6 +606,8 @@ _GLOBAL(_switch) std r23,_CCR(r1) std r1,KSP(r3) /* Set old stack pointer */ + kuap_check_amr r9 r10 + FLUSH_COUNT_CACHE /* @@ -942,6 +956,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) ld r4,_XER(r1) mtspr SPRN_XER,r4 + kuap_check_amr r5 r6 + REST_8GPRS(5, r1) andi. r0,r3,MSR_RI @@ -974,6 +990,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ACCOUNT_CPU_USER_EXIT(r13, r2, r4) REST_GPR(13, r1) + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ mtspr SPRN_SRR1,r3 ld r2,_CCR(r1) @@ -1006,6 +1026,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r0,GPR0(r1) ld r2,GPR2(r1) ld r3,GPR3(r1) + + kuap_restore_amr r4 + ld r4,GPR4(r1) ld r1,GPR1(r1) RFI_TO_KERNEL diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index a5b8fbae56a0..99b1bc4e190b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -19,6 +19,7 @@ #include <asm/cpuidle.h> #include <asm/head-64.h> #include <asm/feature-fixups.h> +#include <asm/kup.h> /* * There are a few constraints to be concerned with. @@ -309,6 +310,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early) mfspr r11,SPRN_DSISR /* Save DSISR */ std r11,_DSISR(r1) std r9,_CCR(r1) /* Save CR in stackframe */ + kuap_save_amr_and_lock r9, r10, cr1 /* Save r9 through r13 from EXMC save area to stack frame. */ EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) mfmsr r11 /* get MSR value */ @@ -1097,6 +1099,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early) mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ mfspr r12,SPRN_HSRR1 /* Save HSRR1 */ EXCEPTION_PROLOG_COMMON_1() + /* We don't touch AMR here, we never go to virtual mode */ EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) EXCEPTION_PROLOG_COMMON_3(0xe60) addi r3,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 224bcd4be5ae..0c87832ddd6c 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -29,6 +29,7 @@ #include <asm/powernv.h> #include <asm/sections.h> #include <asm/trace.h> +#include <asm/uaccess.h> #include <trace/events/thp.h> @@ -553,6 +554,23 @@ void __init setup_kuep(bool disabled) } #endif +#ifdef CONFIG_PPC_KUAP +void __init setup_kuap(bool disabled) +{ + if (disabled || !early_radix_enabled()) + return; + + if (smp_processor_id() == boot_cpuid) { + pr_info("Activating Kernel Userspace Access Prevention\n"); + cur_cpu_spec->mmu_features |= MMU_FTR_RADIX_KUAP; + } + + /* Make sure userspace can't change the AMR */ + mtspr(SPRN_UAMOR, 0); + mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); +} +#endif + void __init radix__early_init_mmu(void) { unsigned long lpcr; diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index 587807763737..ae7fca40e5b3 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -7,6 +7,7 @@ #include <asm/mman.h> #include <asm/mmu_context.h> +#include <asm/mmu.h> #include <asm/setup.h> #include <linux/pkeys.h> #include <linux/of_device.h> diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 60371784c9f1..5e53b9fd62aa 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -327,6 +327,7 @@ config PPC_RADIX_MMU depends on PPC_BOOK3S_64 select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA select PPC_HAVE_KUEP + select PPC_HAVE_KUAP default y help Enable support for the Power ISA 3.0 Radix style MMU. Currently this @@ -370,6 +371,13 @@ config PPC_KUAP If you're unsure, say Y. +config PPC_KUAP_DEBUG + bool "Extra debugging for Kernel Userspace Access Protection" + depends on PPC_HAVE_KUAP && PPC_RADIX_MMU + help + Add extra debugging for Kernel Userspace Access Protection (KUAP) + If you're unsure, say N. + config ARCH_ENABLE_HUGEPAGE_MIGRATION def_bool y depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION -- 2.20.1