From: Dave Hansen <dave.han...@linux.intel.com> These patches are based on work from a team at Graz University of Technology posted here: https://github.com/IAIK/KAISER
The KAISER approach keeps two copies of the page tables: one for running in the kernel and one for running userspace. But, there are a few structures that are needed for switching in and out of the kernel and a good subset of *those* are per-cpu data. Here's a short summary of the things mapped to userspace: * The gdt_page's virtual address is pointed to by the LGDT instruction. It is needed to define the segments. Deeply required by CPU to run. * cpu_tss tells the CPU, among other things, where the new stacks are after user<->kernel transitions. Needed by the CPU to make ring transitions. * exception_stacks are needed at interrupt and exception entry so that there is storage for, among other things, some temporary space to permit clobbering a register to load the kernel CR3. Signed-off-by: Dave Hansen <dave.han...@linux.intel.com> Cc: Andy Lutomirski <l...@kernel.org> Cc: Borislav Petkov <b...@alien8.de> Cc: Brian Gerst <brge...@gmail.com> Cc: Daniel Gruss <daniel.gr...@iaik.tugraz.at> Cc: Denys Vlasenko <dvlas...@redhat.com> Cc: H. Peter Anvin <h...@zytor.com> Cc: Hugh Dickins <hu...@google.com> Cc: Josh Poimboeuf <jpoim...@redhat.com> Cc: Kees Cook <keesc...@google.com> Cc: Linus Torvalds <torva...@linux-foundation.org> Cc: Michael Schwarz <michael.schw...@iaik.tugraz.at> Cc: Moritz Lipp <moritz.l...@iaik.tugraz.at> Cc: Peter Zijlstra <pet...@infradead.org> Cc: Richard Fellner <richard.fell...@student.tugraz.at> Cc: Thomas Gleixner <t...@linutronix.de> Cc: linux...@kvack.org Link: http://lkml.kernel.org/r/20171123003445.df9ea...@viggo.jf.intel.com Signed-off-by: Ingo Molnar <mi...@kernel.org> --- arch/x86/include/asm/desc.h | 2 +- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/process.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index aab4fe9f49f8..300090d1c209 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -46,7 +46,7 @@ struct gdt_page { struct desc_struct gdt[GDT_ENTRIES]; } __attribute__((aligned(PAGE_SIZE))); -DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page); +DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page); /* Provide the original GDT */ static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 54f3ee3bc8a0..83dd7c97ba5d 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -359,7 +359,7 @@ struct tss_struct { unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; } __aligned(PAGE_SIZE); -DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss); +DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss); /* * sizeof(unsigned long) coming from an extra "long" at the end diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f9c7e6852874..3b6920c9fef7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -98,7 +98,7 @@ static const struct cpu_dev default_cpu = { static const struct cpu_dev *this_cpu = &default_cpu; -DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { +DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page) = { .gdt = { #ifdef CONFIG_X86_64 /* * We need valid kernel segments for data and code in long mode too @@ -515,7 +515,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { [DEBUG_STACK - 1] = DEBUG_STKSZ }; -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks +DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(char, exception_stacks [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); #endif diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6a04287f222b..9365b4f965e0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -47,7 +47,7 @@ * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { +__visible DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss) = { .x86_tss = { /* * .sp0 is only used when entering ring 0 from a lower -- 2.14.1