On Fri, Apr 05, 2019 at 05:07:07PM +0200, Thomas Gleixner wrote: > At the moment everything assumes a full linear mapping of the various > exception stacks. Adding guard pages to the cpu entry area mapping of the > exception stacks will break that assumption. > > As a preparatory step convert both the real storage and the effective > mapping in the cpu entry area from character arrays to structures. > > To ensure that both arrays have the same ordering and the same size of the > individual stacks fill the members with a macro. The guard size is the only > difference between the two resulting structures. For now both have guard > size 0 until the preparation of all usage sites is done. > > Provide a couple of helper macros which are used in the following > conversions. > > Signed-off-by: Thomas Gleixner <t...@linutronix.de> > --- > arch/x86/include/asm/cpu_entry_area.h | 51 > ++++++++++++++++++++++++++++++---- > arch/x86/kernel/cpu/common.c | 2 - > arch/x86/mm/cpu_entry_area.c | 8 ++--- > 3 files changed, 50 insertions(+), 11 deletions(-) > > --- a/arch/x86/include/asm/cpu_entry_area.h > +++ b/arch/x86/include/asm/cpu_entry_area.h > @@ -7,6 +7,50 @@ > #include <asm/processor.h> > #include <asm/intel_ds.h> > > +#ifdef CONFIG_X86_64 > + > +/* Macro to enforce the same ordering and stack sizes */ > +#define ESTACKS_MEMBERS(guardsize) \ > + char DF_stack[EXCEPTION_STKSZ]; \ > + char DF_stack_guard[guardsize]; \ > + char NMI_stack[EXCEPTION_STKSZ]; \ > + char NMI_stack_guard[guardsize]; \ > + char DB_stack[DEBUG_STKSZ]; \ > + char DB_stack_guard[guardsize]; \ > + char MCE_stack[EXCEPTION_STKSZ]; \ > + char MCE_stack_guard[guardsize]; \
Conceptually, shouldn't the stack guard precede its associated stack since the stacks grow down? And don't we want a guard page below the DF_stack? There could still be a guard page above MCE_stack, e.g. IST_stack_guard or something. E.g. the example in patch "Speedup in_exception_stack()" also suggests that "guard page" is associated with the stack physical above it: --- top of DB_stack <- Initial stack --- end of DB_stack guard page --- top of DB1_stack <- Top of stack after entering first #DB --- end of DB1_stack guard page --- top of DB2_stack <- Top of stack after entering second #DB --- end of DB2_stack guard page > + > +/* The exception stacks linear storage. No guard pages required */ > +struct exception_stacks { > + ESTACKS_MEMBERS(0) > +}; > + > +/* > + * The effective cpu entry area mapping with guard pages. Guard size is > + * zero until the code which makes assumptions about linear mapping is > + * cleaned up. > + */ > +struct cea_exception_stacks { > + ESTACKS_MEMBERS(0) > +}; > + > +#define CEA_ESTACK_TOP(ceastp, st) \ > + ((unsigned long)&(ceastp)->st## _stack_guard) IMO, using the stack guard to define the top of stack is unnecessarily confusing and fragile, e.g. reordering the names of the stack guards would break this macro. What about: #define CEA_ESTACK_TOP(ceastp, st) \ (CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st)) > +#define CEA_ESTACK_BOT(ceastp, st) \ > + ((unsigned long)&(ceastp)->st## _stack) > + > +#define CEA_ESTACK_OFFS(st) \ > + offsetof(struct cea_exception_stacks, st## _stack) > + > +#define CEA_ESTACK_SIZE(st) \ > + sizeof(((struct cea_exception_stacks *)0)->st## _stack) > + > +#define CEA_ESTACK_PAGES \ > + (sizeof(struct cea_exception_stacks) / PAGE_SIZE) > + > +#endif > + > /* > * cpu_entry_area is a percpu region that contains things needed by the CPU > * and early entry/exit code. Real types aren't used for all fields here > @@ -32,12 +76,9 @@ struct cpu_entry_area { > > #ifdef CONFIG_X86_64 > /* > - * Exception stacks used for IST entries. > - * > - * In the future, this should have a separate slot for each stack > - * with guard pages between them. > + * Exception stacks used for IST entries with guard pages. > */ > - char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + > DEBUG_STKSZ]; > + struct cea_exception_stacks estacks; > #endif > #ifdef CONFIG_CPU_SUP_INTEL > /* > --- a/arch/x86/kernel/cpu/common.c > +++ b/arch/x86/kernel/cpu/common.c > @@ -1754,7 +1754,7 @@ void cpu_init(void) > * set up and load the per-CPU TSS > */ > if (!oist->ist[0]) { > - char *estacks = get_cpu_entry_area(cpu)->exception_stacks; > + char *estacks = (char *)&get_cpu_entry_area(cpu)->estacks; > > for (v = 0; v < N_EXCEPTION_STACKS; v++) { > estacks += exception_stack_sizes[v]; > --- a/arch/x86/mm/cpu_entry_area.c > +++ b/arch/x86/mm/cpu_entry_area.c > @@ -13,8 +13,7 @@ > static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, > entry_stack_storage); > > #ifdef CONFIG_X86_64 > -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks > - [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); > +static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, > exception_stacks); > #endif > > struct cpu_entry_area *get_cpu_entry_area(int cpu) > @@ -138,9 +137,8 @@ static void __init setup_cpu_entry_area( > #ifdef CONFIG_X86_64 > BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); > BUILD_BUG_ON(sizeof(exception_stacks) != > - sizeof(((struct cpu_entry_area *)0)->exception_stacks)); > - cea_map_percpu_pages(&cea->exception_stacks, > - &per_cpu(exception_stacks, cpu), > + sizeof(((struct cpu_entry_area *)0)->estacks)); > + cea_map_percpu_pages(&cea->estacks, &per_cpu(exception_stacks, cpu), > sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL); > #endif > percpu_setup_debug_store(cpu); > >