On Fri, Sept 22, 2023 at 06:56, anton.iva...@cambridgegreys.com wrote: > > From: Anton Ivanov anton.iva...@cambridgegreys.com > > > 1. Preemption requires saving/restoring FPU state. This patch > adds support for it using GCC intrinsics as well as appropriate > storage space in the thread structure. > > 2. irq critical sections need preempt_disable()/preempt_enable(). > > 3. TLB critical sections need preempt_disable()/preempt_enable(). > > 4. UML TLB flush is also invoked during a fork. This happens > with interrupts and preempt disabled which disagrees with the > standard mm locking via rwsem. The mm lock for this code path > had to be replaced with an rcu.
"Had to be"? Why? It's a very unconventional locking method and should have some justification. > > 5. The FPU state area is statically allocated depending on > the enabled PREEMPT options. PREEMPT_DYNAMIC and chosing the > preemption model at start time is disabled for the UM arch. > > Signed-off-by: Anton Ivanov anton.iva...@cambridgegreys.com > > --- > arch/um/Kconfig | 2 +- > arch/um/include/asm/fpu/api.h | 9 ++- > arch/um/include/asm/processor-generic.h | 4 ++ > arch/um/kernel/Makefile | 4 ++ > arch/um/kernel/fpu.c | 75 +++++++++++++++++++++++++ > arch/um/kernel/irq.c | 2 + > arch/um/kernel/tlb.c | 21 ++++++- > 7 files changed, 111 insertions(+), 6 deletions(-) > create mode 100644 arch/um/kernel/fpu.c > > diff --git a/arch/um/Kconfig b/arch/um/Kconfig > index b5e179360534..19176fde82f3 100644 > --- a/arch/um/Kconfig > +++ b/arch/um/Kconfig > @@ -11,7 +11,7 @@ config UML > select ARCH_HAS_KCOV > select ARCH_HAS_STRNCPY_FROM_USER > select ARCH_HAS_STRNLEN_USER > - select ARCH_NO_PREEMPT > + select ARCH_NO_PREEMPT_DYNAMIC > select HAVE_ARCH_AUDITSYSCALL > select HAVE_ARCH_KASAN if X86_64 > select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN > diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h > index 71bfd9ef3938..9e7680bf48f0 100644 > --- a/arch/um/include/asm/fpu/api.h > +++ b/arch/um/include/asm/fpu/api.h > @@ -4,12 +4,15 @@ > > /* Copyright (c) 2020 Cambridge Greys Ltd > * Copyright (c) 2020 Red Hat Inc. > - * A set of "dummy" defines to allow the direct inclusion > - * of x86 optimized copy, xor, etc routines into the > - * UML code tree. */ > + / > > +#if defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY) > +extern void kernel_fpu_begin(void); > +extern void kernel_fpu_end(void); > +#else > #define kernel_fpu_begin() (void)0 > #define kernel_fpu_end() (void)0 > +#endif > > static inline bool irq_fpu_usable(void) > { > diff --git a/arch/um/include/asm/processor-generic.h > b/arch/um/include/asm/processor-generic.h > index 7414154b8e9a..9970e70be1e4 100644 > --- a/arch/um/include/asm/processor-generic.h > +++ b/arch/um/include/asm/processor-generic.h > @@ -44,6 +44,10 @@ struct thread_struct { > } cb; > } u; > } request; > +#if defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY) > +/ Intel docs require xsave/xrestore area to be aligned to 64 bytes / > + u8 fpu[2048] __aligned(64); > +#endif > }; > > #define INIT_THREAD \ > diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile > index 811188be954c..c616e884a488 100644 > --- a/arch/um/kernel/Makefile > +++ b/arch/um/kernel/Makefile > @@ -26,9 +26,13 @@ obj-$(CONFIG_OF) += dtb.o > obj-$(CONFIG_EARLY_PRINTK) += early_printk.o > obj-$(CONFIG_STACKTRACE) += stacktrace.o > obj-$(CONFIG_GENERIC_PCI_IOMAP) += ioport.o > +obj-$(CONFIG_PREEMPT) += fpu.o > +obj-$(CONFIG_PREEMPT_VOLUNTARY) += fpu.o > > USER_OBJS := config.o > > +CFLAGS_fpu.o += -mxsave -mxsaveopt > + > include $(srctree)/arch/um/scripts/Makefile.rules > > targets := config.c config.tmp capflags.c > diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c > new file mode 100644 > index 000000000000..4817276b2a26 > --- /dev/null > +++ b/arch/um/kernel/fpu.c > @@ -0,0 +1,75 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/ > + * Copyright (C) 2023 Cambridge Greys Ltd > + * Copyright (C) 2023 Red Hat Inc > + */ > + > +#include <linux/cpu.h> > > +#include <linux/init.h> > > +#include <asm/fpu/api.h> > > +#include <asm/cpufeature.h> > > + > +/* > + * The critical section between kernel_fpu_begin() and kernel_fpu_end() > + * is non-reentrant. It is the caller's responsibility to avoid reentrance. > + / > + > +static DEFINE_PER_CPU(bool, in_kernel_fpu); > + > +/ UML and driver code it pulls out of the x86 tree knows about 387 features > + * up to and including AVX512. TILE, etc are not yet supported. > + */ > + > +#define KNOWN_387_FEATURES 0xFF > + > +void kernel_fpu_begin(void) > +{ > + preempt_disable(); > + > + WARN_ON(this_cpu_read(in_kernel_fpu)); > + > + this_cpu_write(in_kernel_fpu, true); > + > +#ifdef CONFIG_64BIT > + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT))) > + __builtin_ia32_xsaveopt64(¤t->thread.fpu, KNOWN_387_FEATURES); > > + else { > + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE))) > + __builtin_ia32_xsave64(¤t->thread.fpu, KNOWN_387_FEATURES); > > + else > + __builtin_ia32_fxsave64(¤t->thread.fpu); > > + } > +#else > + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT))) > + __builtin_ia32_xsaveopt(¤t->thread.fpu, KNOWN_387_FEATURES); > > + else { > + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE))) > + __builtin_ia32_xsave(¤t->thread.fpu, KNOWN_387_FEATURES); > > + else > + __builtin_ia32_fxsave(¤t->thread.fpu); > > + } > +#endif > +} > +EXPORT_SYMBOL_GPL(kernel_fpu_begin); > + Johannes is right about how flattening the else branch makes it a bit more readable, imo. > +void kernel_fpu_end(void) > +{ > + WARN_ON(!this_cpu_read(in_kernel_fpu)); > + > +#ifdef CONFIG_64BIT > + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE))) > + __builtin_ia32_xrstor64(¤t->thread.fpu, KNOWN_387_FEATURES); > > + else > + __builtin_ia32_fxrstor64(¤t->thread.fpu); > > +#else > + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE))) > + __builtin_ia32_xrstor(¤t->thread.fpu, KNOWN_387_FEATURES); > > + else > + __builtin_ia32_fxrstor(¤t->thread.fpu); > > +#endif > + this_cpu_write(in_kernel_fpu, false); > + > + preempt_enable(); > +} > +EXPORT_SYMBOL_GPL(kernel_fpu_end); --->8--- > @@ -597,8 +609,13 @@ void force_flush_all(void) > struct vm_area_struct vma; > VMA_ITERATOR(vmi, mm, 0); > > - mmap_read_lock(mm); > + / We use a RCU lock instead of a mm lock, because > + * this can be invoked out of critical/atomic sections > + * and that does not agree with the sleepable semantics > + * of the standard semaphore based mm lock. > + */ As Johannes said, this comment doesn't explain why it's needed, just that an rcu lock is needed. > + rcu_read_lock(); > for_each_vma(vmi, vma) > fix_range(mm, vma->vm_start, vma->vm_end, 1); > > - mmap_read_unlock(mm); > + rcu_read_unlock(); > } > -- > 2.30.2 I didn't audit the assortment of preempt_{enable,disable}() pairs, but I did test this patch with defconfig + debug.config + CONFIG_PREEMPT=y for 15 minutes and I didn't see any warnings that used to show up. I think that we should fix what currently exists, then rebase this PREEMPT patch on top of the results. Plus that'll give me time to look into the avx + !xsave edge case :) Cheers, Peter _______________________________________________ linux-um mailing list linux-um@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-um