The barrier_nospec() in 64-bit copy_from_user() is slow. Instead use pointer masking to force the user pointer to all 1's if the access_ok() mispredicted true for an invalid address.
The kernel test robot reports a 2.6% improvement in the per_thread_ops benchmark (see link below). To avoid regressing powerpc and 32-bit x86, move their barrier_nospec() calls to their respective raw_copy_from_user() implementations so there's no functional change there. Note that for safety on some AMD CPUs, this relies on recent commit 86e6b1547b3d ("x86: fix user address masking non-canonical speculation issue"). Link: https://lore.kernel.org/202410281344.d02c72a2-oliver.s...@intel.com Signed-off-by: Josh Poimboeuf <jpoim...@kernel.org> --- arch/powerpc/include/asm/uaccess.h | 2 ++ arch/x86/include/asm/uaccess_32.h | 1 + arch/x86/include/asm/uaccess_64.h | 1 + include/linux/uaccess.h | 6 ------ 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 4f5a46a77fa2..12abb8bf5eda 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -7,6 +7,7 @@ #include <asm/extable.h> #include <asm/kup.h> #include <asm/asm-compat.h> +#include <asm/barrier.h> #ifdef __powerpc64__ /* We use TASK_SIZE_USER64 as TASK_SIZE is not constant */ @@ -341,6 +342,7 @@ static inline unsigned long raw_copy_from_user(void *to, { unsigned long ret; + barrier_nospec(); allow_read_from_user(from, n); ret = __copy_tofrom_user((__force void __user *)to, from, n); prevent_read_from_user(from, n); diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 40379a1adbb8..8393ba104b2c 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -23,6 +23,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) static __always_inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { + barrier_nospec(); return __copy_user_ll(to, (__force const void *)from, n); } diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index b0a887209400..7ce84090f0ec 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -138,6 +138,7 @@ copy_user_generic(void *to, const void *from, unsigned long len) static __always_inline __must_check unsigned long raw_copy_from_user(void *dst, const void __user *src, unsigned long size) { + src = mask_user_address(src); return copy_user_generic(dst, (__force void *)src, size); } diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 39c7cf82b0c2..dda9725a9559 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -160,12 +160,6 @@ _inline_copy_from_user(void *to, const void __user *from, unsigned long n) unsigned long res = n; might_fault(); if (!should_fail_usercopy() && likely(access_ok(from, n))) { - /* - * Ensure that bad access_ok() speculation will not - * lead to nasty side effects *after* the copy is - * finished: - */ - barrier_nospec(); instrument_copy_from_user_before(to, from, n); res = raw_copy_from_user(to, from, n); instrument_copy_from_user_after(to, from, n, res); -- 2.47.0