On Fri, Apr 05, 2019 at 05:07:07PM +0200, Thomas Gleixner wrote:
> At the moment everything assumes a full linear mapping of the various
> exception stacks. Adding guard pages to the cpu entry area mapping of the
> exception stacks will break that assumption.
> 
> As a preparatory step convert both the real storage and the effective
> mapping in the cpu entry area from character arrays to structures.
> 
> To ensure that both arrays have the same ordering and the same size of the
> individual stacks fill the members with a macro. The guard size is the only
> difference between the two resulting structures. For now both have guard
> size 0 until the preparation of all usage sites is done.
> 
> Provide a couple of helper macros which are used in the following
> conversions.
> 
> Signed-off-by: Thomas Gleixner <t...@linutronix.de>
> ---
>  arch/x86/include/asm/cpu_entry_area.h |   51 
> ++++++++++++++++++++++++++++++----
>  arch/x86/kernel/cpu/common.c          |    2 -
>  arch/x86/mm/cpu_entry_area.c          |    8 ++---
>  3 files changed, 50 insertions(+), 11 deletions(-)
> 
> --- a/arch/x86/include/asm/cpu_entry_area.h
> +++ b/arch/x86/include/asm/cpu_entry_area.h
> @@ -7,6 +7,50 @@
>  #include <asm/processor.h>
>  #include <asm/intel_ds.h>
>  
> +#ifdef CONFIG_X86_64
> +
> +/* Macro to enforce the same ordering and stack sizes */
> +#define ESTACKS_MEMBERS(guardsize)           \
> +     char    DF_stack[EXCEPTION_STKSZ];      \
> +     char    DF_stack_guard[guardsize];      \
> +     char    NMI_stack[EXCEPTION_STKSZ];     \
> +     char    NMI_stack_guard[guardsize];     \
> +     char    DB_stack[DEBUG_STKSZ];          \
> +     char    DB_stack_guard[guardsize];      \
> +     char    MCE_stack[EXCEPTION_STKSZ];     \
> +     char    MCE_stack_guard[guardsize];     \

Conceptually, shouldn't the stack guard precede its associated stack
since the stacks grow down?  And don't we want a guard page below the
DF_stack?  There could still be a guard page above MCE_stack,
e.g. IST_stack_guard or something.

E.g. the example in patch "Speedup in_exception_stack()" also suggests
that "guard page" is associated with the stack physical above it:

      --- top of DB_stack       <- Initial stack
      --- end of DB_stack
          guard page

      --- top of DB1_stack      <- Top of stack after entering first #DB
      --- end of DB1_stack
          guard page

      --- top of DB2_stack      <- Top of stack after entering second #DB
      --- end of DB2_stack
          guard page

> +
> +/* The exception stacks linear storage. No guard pages required */
> +struct exception_stacks {
> +     ESTACKS_MEMBERS(0)
> +};
> +
> +/*
> + * The effective cpu entry area mapping with guard pages. Guard size is
> + * zero until the code which makes assumptions about linear mapping is
> + * cleaned up.
> + */
> +struct cea_exception_stacks {
> +     ESTACKS_MEMBERS(0)
> +};
> +
> +#define CEA_ESTACK_TOP(ceastp, st)                   \
> +     ((unsigned long)&(ceastp)->st## _stack_guard)

IMO, using the stack guard to define the top of stack is unnecessarily
confusing and fragile, e.g. reordering the names of the stack guards
would break this macro.

What about:

#define CEA_ESTACK_TOP(ceastp, st)                      \
        (CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st))

> +#define CEA_ESTACK_BOT(ceastp, st)                   \
> +     ((unsigned long)&(ceastp)->st## _stack)
> +
> +#define CEA_ESTACK_OFFS(st)                                  \
> +     offsetof(struct cea_exception_stacks, st## _stack)
> +
> +#define CEA_ESTACK_SIZE(st)                                  \
> +     sizeof(((struct cea_exception_stacks *)0)->st## _stack)
> +
> +#define CEA_ESTACK_PAGES                                     \
> +     (sizeof(struct cea_exception_stacks) / PAGE_SIZE)
> +
> +#endif
> +
>  /*
>   * cpu_entry_area is a percpu region that contains things needed by the CPU
>   * and early entry/exit code.  Real types aren't used for all fields here
> @@ -32,12 +76,9 @@ struct cpu_entry_area {
>  
>  #ifdef CONFIG_X86_64
>       /*
> -      * Exception stacks used for IST entries.
> -      *
> -      * In the future, this should have a separate slot for each stack
> -      * with guard pages between them.
> +      * Exception stacks used for IST entries with guard pages.
>        */
> -     char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + 
> DEBUG_STKSZ];
> +     struct cea_exception_stacks estacks;
>  #endif
>  #ifdef CONFIG_CPU_SUP_INTEL
>       /*
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -1754,7 +1754,7 @@ void cpu_init(void)
>        * set up and load the per-CPU TSS
>        */
>       if (!oist->ist[0]) {
> -             char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
> +             char *estacks = (char *)&get_cpu_entry_area(cpu)->estacks;
>  
>               for (v = 0; v < N_EXCEPTION_STACKS; v++) {
>                       estacks += exception_stack_sizes[v];
> --- a/arch/x86/mm/cpu_entry_area.c
> +++ b/arch/x86/mm/cpu_entry_area.c
> @@ -13,8 +13,7 @@
>  static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, 
> entry_stack_storage);
>  
>  #ifdef CONFIG_X86_64
> -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
> -     [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
> +static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, 
> exception_stacks);
>  #endif
>  
>  struct cpu_entry_area *get_cpu_entry_area(int cpu)
> @@ -138,9 +137,8 @@ static void __init setup_cpu_entry_area(
>  #ifdef CONFIG_X86_64
>       BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
>       BUILD_BUG_ON(sizeof(exception_stacks) !=
> -                  sizeof(((struct cpu_entry_area *)0)->exception_stacks));
> -     cea_map_percpu_pages(&cea->exception_stacks,
> -                          &per_cpu(exception_stacks, cpu),
> +                  sizeof(((struct cpu_entry_area *)0)->estacks));
> +     cea_map_percpu_pages(&cea->estacks, &per_cpu(exception_stacks, cpu),
>                            sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
>  #endif
>       percpu_setup_debug_store(cpu);
> 
> 

Reply via email to