On 21/09/2023 14:46, Peter Lafreniere wrote:
On Thu, Sep 21, 2023 at 05:28, anton.iva...@cambridgegreys.com wrote:
From: Anton Ivanov anton.iva...@cambridgegreys.com


Preemption requires saving/restoring FPU state. This patch
adds support for it using GCC intrinsics.

Signed-off-by: Anton Ivanov anton.iva...@cambridgegreys.com

---
arch/um/Kconfig | 1 -
arch/um/Makefile | 3 +-
arch/um/include/asm/fpu/api.h | 4 +-
arch/um/include/asm/processor-generic.h | 3 +
arch/um/kernel/Makefile | 2 +-
arch/um/kernel/fpu.c | 83 +++++++++++++++++++++++++
arch/um/kernel/irq.c | 2 +
7 files changed, 93 insertions(+), 5 deletions(-)
create mode 100644 arch/um/kernel/fpu.c

diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index b5e179360534..603f5fd82293 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -11,7 +11,6 @@ config UML
select ARCH_HAS_KCOV
select ARCH_HAS_STRNCPY_FROM_USER
select ARCH_HAS_STRNLEN_USER
- select ARCH_NO_PREEMPT
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_KASAN if X86_64
select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 82f05f250634..6454f735cc9a 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \
$(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \
-Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \
-Din6addr_loopback=kernel_in6addr_loopback \
- -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr
+ -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \
+ -mxsave -mxsaveopt

KBUILD_RUSTFLAGS += -Crelocation-model=pie

diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
index 71bfd9ef3938..0094624ae9b4 100644
--- a/arch/um/include/asm/fpu/api.h
+++ b/arch/um/include/asm/fpu/api.h
@@ -8,8 +8,8 @@
* of x86 optimized copy, xor, etc routines into the
* UML code tree. /

-#define kernel_fpu_begin() (void)0
-#define kernel_fpu_end() (void)0
+void kernel_fpu_begin(void);
+void kernel_fpu_end(void);

static inline bool irq_fpu_usable(void)
{
diff --git a/arch/um/include/asm/processor-generic.h 
b/arch/um/include/asm/processor-generic.h
index 7414154b8e9a..d5924d0e16a7 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -44,6 +44,9 @@ struct thread_struct {
} cb;
} u;
} request;
+#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
+ u8 fpu[2048] __aligned(64); / Intel docs require xsave/xrestore area to be 
aligned to 16 bytes /
+#endif
};

#define INIT_THREAD \
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index 811188be954c..5d9fbaa544be 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -16,7 +16,7 @@ extra-y := vmlinux.lds

obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
physmem.o process.o ptrace.o reboot.o sigio.o \
- signal.o sysrq.o time.o tlb.o trap.o \
+ signal.o sysrq.o time.o tlb.o trap.o fpu.o\
um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/
obj-y += load_file.o

diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c
new file mode 100644
index 000000000000..fb90da5a0227
--- /dev/null
+++ b/arch/um/kernel/fpu.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/
+ * Copyright (C) 2023 Cambridge Greys Ltd
+ * Copyright (C) 2023 Red Hat Inc
+ */
+
+#include <linux/cpu.h>

+#include <linux/init.h>

+#include <asm/fpu/api.h>

+#include <asm/cpufeature.h>

+
+/*
+ * The critical section between kernel_fpu_begin() and kernel_fpu_end()
+ * is non-reentrant. It is the caller's responsibility to avoid reentrance.
+ /
+
+#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
+static DEFINE_PER_CPU(bool, in_kernel_fpu);
+#endif
+
+/ UML knows about 387 features up to and including AVX512, tile, etc are not 
yet
+ * supported.
+ */
Features that are not supported probably shouldn't be reflected, including
if the appropiate xsave instruction isn't available.

They are OK to use by userspace and the same source is used for /proc/cpuinfo 
which can be used by userspace to see all available features.

Userspace task switching in UML uses a different mechanism for save/restore 
which will preserve them (ptrace REGs).


+
+#define KNOWN_387_FEATURES 0xFF
+
+
+void kernel_fpu_begin(void)
+{
+#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
+ preempt_disable();
+
+ WARN_ON(this_cpu_read(in_kernel_fpu));
+
+ this_cpu_write(in_kernel_fpu, true);
+
+#ifdef CONFIG_64BIT
+ if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
+ __builtin_ia32_xsaveopt64(&current->thread.fpu, KNOWN_387_FEATURES);

+ else {
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE))
+ __builtin_ia32_xsave64(&current->thread.fpu, KNOWN_387_FEATURES);

+ else
+ __builtin_ia32_fxsave64(&current->thread.fpu);

+ }
+#else
+ if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
+ __builtin_ia32_xsaveopt(&current->thread.fpu, KNOWN_387_FEATURES);

+ else {
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE))
+ __builtin_ia32_xsave(&current->thread.fpu, KNOWN_387_FEATURES);

+ else
+ __builtin_ia32_fxsave(&current->thread.fpu);

+ }
+#endif
+#endif
+}
I would keep kernel_fpu_begin() as an empty macro if !defined(PREEMPT) &&
!defined(PREEMPT_VOLUNTARY) to avoid calling empty functions, then only
compile fpu.c when you need PREEMPT behavior. This has the added bonus of
removing a level of #if in this function.

Sure. We can. IMHO the compiler will inline it and optimize it out.


I agree with Johannes about the if-else chains.

The if-else chains would be a good use for alternatives, but we know how
that works with UML.

And what's the reasoning behind making xsaveopt support likely, but not
xsave in the fallback path?

xsaveopt has been around for what? 10+ years now? That is likely :) Very likely.

Sure we can put a likely on the following xsave. If you are hitting that, you 
are on a pre-2011 CPU.


+
+EXPORT_SYMBOL_GPL(kernel_fpu_begin);
+
+void kernel_fpu_end(void)
+{
+#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
+ WARN_ON(!this_cpu_read(in_kernel_fpu));
+
+#ifdef CONFIG_64BIT
+ if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
+ __builtin_ia32_xrstor64(&current->thread.fpu, KNOWN_387_FEATURES);

+ else
+ __builtin_ia32_fxrstor64(&current->thread.fpu);

+#else
+ if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
+ __builtin_ia32_xrstor(&current->thread.fpu, KNOWN_387_FEATURES);

+ else
+ __builtin_ia32_fxrstor(&current->thread.fpu);

+#endif
+ this_cpu_write(in_kernel_fpu, false);
+
+ preempt_enable();
+#endif
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_end);
+
See above, minus the likely() part.

diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 635d44606bfe..c02525da45df 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -195,7 +195,9 @@ static void _sigio_handler(struct uml_pt_regs *regs,

void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
{
+ preempt_disable();
_sigio_handler(regs, irqs_suspended);
+ preempt_enable();
}

static struct irq_entry *get_irq_entry_by_fd(int fd)
--
2.30.2
As a reminder, compilations with debug.config fail with the following error:

---
In file included from arch/um/kernel/irq.c:19:
./arch/um/include/shared/kern_util.h:53:12: error: conflicting types for 
‘__cant_sleep’; have ‘int(void)’
    53 | extern int __cant_sleep(void);
       |            ^~~~~~~~~~~~
In file included from ./include/linux/cpumask.h:10,
                  from arch/um/kernel/irq.c:10:
./include/linux/kernel.h:129:13: note: previous declaration of ‘__cant_sleep’ 
with type ‘void(const char *, int,  int)’
   129 | extern void __cant_sleep(const char *file, int line, int 
preempt_offset);

OK. Will look into this one. This looks like a bug in the first place.

       |             ^~~~~~~~~~~~
---

Cheers,
Peter


--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um

Reply via email to