Le 04/11/2022 à 18:27, Andrew Donnellan a écrit : > powerpc unfortunately has too many places where we run stuff in real mode. > > With CONFIG_VMAP_STACK enabled, this means we need to be able to swap the > stack pointer to use the linear mapping when we enter a real mode section, > and back afterwards. > > Store the top bits of the stack pointer in both the linear map and the > vmalloc space in the PACA, and add some helper macros/functions to swap > between them.
That may work when pagesize is 64k because stack is on a single page, but I doubt is works with 4k pages, because vmalloc may allocate non contiguous pages. > > Signed-off-by: Andrew Donnellan <a...@linux.ibm.com> > > --- > > Some of the helpers that are currently unused will be used in the next > version of the series for the KVM real mode handling > --- > arch/powerpc/include/asm/book3s/64/stack.h | 71 ++++++++++++++++++++++ > arch/powerpc/include/asm/opal.h | 1 + > arch/powerpc/include/asm/paca.h | 4 ++ > arch/powerpc/include/asm/processor.h | 6 ++ > arch/powerpc/kernel/asm-offsets.c | 8 +++ > arch/powerpc/kernel/entry_64.S | 7 +++ > arch/powerpc/kernel/process.c | 4 ++ > arch/powerpc/kernel/smp.c | 7 +++ > arch/powerpc/xmon/xmon.c | 4 ++ > 9 files changed, 112 insertions(+) > create mode 100644 arch/powerpc/include/asm/book3s/64/stack.h > > diff --git a/arch/powerpc/include/asm/book3s/64/stack.h > b/arch/powerpc/include/asm/book3s/64/stack.h > new file mode 100644 > index 000000000000..6b31adb1a026 > --- /dev/null > +++ b/arch/powerpc/include/asm/book3s/64/stack.h > @@ -0,0 +1,71 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > + > +// Helpers for VMAP_STACK on book3s64 > +// Copyright (C) 2022 IBM Corporation (Andrew Donnellan) > + > +#ifndef _ASM_POWERPC_BOOK3S_64_STACK_H > +#define _ASM_POWERPC_BOOK3S_64_STACK_H > + > +#include <asm/thread_info.h> > + > +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S_64) > + > +#ifdef __ASSEMBLY__ > +// Switch the current stack pointer in r1 between a linear map address and a > +// vmalloc address. Used when we need to go in and out of real mode with > +// CONFIG_VMAP_STACK enabled. > +// > +// tmp: scratch register that can be clobbered > + > +#define SWAP_STACK_LINEAR(tmp) \ > + ld tmp, PACAKSTACK_LINEAR_BASE(r13); \ > + andi. r1, r1, THREAD_SIZE - 1; \ Do you assume THREAD_SIZE to never be more than 64k ? > + or r1, r1, tmp; You can probably do better with rldimi instruction. > +#define SWAP_STACK_VMALLOC(tmp) \ > + ld tmp, PACAKSTACK_VMALLOC_BASE(r13); \ > + andi. r1, r1, THREAD_SIZE - 1; \ > + or r1, r1, tmp; Same > + > +#else // __ASSEMBLY__ > + > +#include <asm/paca.h> > +#include <asm/reg.h> > +#include <linux/mm.h> > + > +#define stack_pa(ptr) (is_vmalloc_addr((ptr)) ? (void > *)vmalloc_to_phys((void *)(ptr)) : (void *)ptr) > + > +static __always_inline void swap_stack_linear(void) > +{ > + current_stack_pointer = get_paca()->kstack_linear_base | \ > + (current_stack_pointer & (THREAD_SIZE - 1)); That looks hacky. I think you can't just change current_stack_pointer on the fly. You have to provide something similar to call_do_softirq() or call_do_irq() > +} > + > +static __always_inline void swap_stack_vmalloc(void) > +{ > + current_stack_pointer = get_paca()->kstack_vmalloc_base | \ > + (current_stack_pointer & (THREAD_SIZE - 1)); Same > +} > + > +#endif // __ASSEMBLY__ > + > +#else // CONFIG_VMAP_STACK && CONFIG_PPC_BOOK3S_64 > + > +#define SWAP_STACK_LINEAR(tmp) > +#define SWAP_STACK_VMALLOC(tmp) > + > +static __always_inline void *stack_pa(void *ptr) > +{ > + return ptr; > +} > + > +static __always_inline void swap_stack_linear(void) > +{ > +} > + > +static __always_inline void swap_stack_vmalloc(void) > +{ > +} > + > +#endif // CONFIG_VMAP_STACK && CONFIG_PPC_BOOK3S_64 > + > +#endif // _ASM_POWERPC_BOOK3S_64_STACK_H > diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h > index 726125a534de..0360360ad2cf 100644 > --- a/arch/powerpc/include/asm/opal.h > +++ b/arch/powerpc/include/asm/opal.h > @@ -13,6 +13,7 @@ > #ifndef __ASSEMBLY__ > > #include <linux/notifier.h> > +#include <asm/book3s/64/stack.h> > > /* We calculate number of sg entries based on PAGE_SIZE */ > #define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct > opal_sg_entry)) > diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h > index 09f1790d0ae1..51d060036fa1 100644 > --- a/arch/powerpc/include/asm/paca.h > +++ b/arch/powerpc/include/asm/paca.h > @@ -163,6 +163,10 @@ struct paca_struct { > */ > struct task_struct *__current; /* Pointer to current */ > u64 kstack; /* Saved Kernel stack addr */ > +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S_64) > + u64 kstack_vmalloc_base; /* Base address of stack in the vmalloc > mapping */ > + u64 kstack_linear_base; /* Base address of stack in the linear > mapping */ > +#endif /* CONFIG_VMAP_STACK && CONFIG_PPC_BOOK3S_64 */ > u64 saved_r1; /* r1 save for RTAS calls or PM or EE=0 > */ > u64 saved_msr; /* MSR saved here by enter_rtas */ > #ifdef CONFIG_PPC64 > diff --git a/arch/powerpc/include/asm/processor.h > b/arch/powerpc/include/asm/processor.h > index 631802999d59..999078452aa4 100644 > --- a/arch/powerpc/include/asm/processor.h > +++ b/arch/powerpc/include/asm/processor.h > @@ -132,6 +132,12 @@ struct debug_reg { > > struct thread_struct { > unsigned long ksp; /* Kernel stack pointer */ > +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S_64) > + // Kernel stack base addresses in vmalloc and linear mappings > + // Used for swapping to linear map in real mode code > + unsigned long ksp_vmalloc_base; > + unsigned long ksp_linear_base; > +#endif /* CONFIG_VMAP_STACK && CONFIG_PPC_BOOK3S_64 */ > > #ifdef CONFIG_PPC64 > unsigned long ksp_vsid; > diff --git a/arch/powerpc/kernel/asm-offsets.c > b/arch/powerpc/kernel/asm-offsets.c > index 4ce2a4aa3985..46ace958d3ce 100644 > --- a/arch/powerpc/kernel/asm-offsets.c > +++ b/arch/powerpc/kernel/asm-offsets.c > @@ -99,6 +99,10 @@ int main(void) > #endif > > OFFSET(KSP, thread_struct, ksp); > +#ifdef CONFIG_VMAP_STACK > + OFFSET(KSP_VMALLOC_BASE, thread_struct, ksp_vmalloc_base); > + OFFSET(KSP_LINEAR_BASE, thread_struct, ksp_linear_base); > +#endif /* CONFIG_VMAP_STACK */ > OFFSET(PT_REGS, thread_struct, regs); > #ifdef CONFIG_BOOKE > OFFSET(THREAD_NORMSAVES, thread_struct, normsave[0]); > @@ -181,6 +185,10 @@ int main(void) > OFFSET(PACAPACAINDEX, paca_struct, paca_index); > OFFSET(PACAPROCSTART, paca_struct, cpu_start); > OFFSET(PACAKSAVE, paca_struct, kstack); > +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S_64) > + OFFSET(PACAKSTACK_VMALLOC_BASE, paca_struct, kstack_vmalloc_base); > + OFFSET(PACAKSTACK_LINEAR_BASE, paca_struct, kstack_linear_base); > +#endif /* CONFIG_VMAP_STACK && CONFIG_PPC_BOOK3S_64 */ > OFFSET(PACACURRENT, paca_struct, __current); > DEFINE(PACA_THREAD_INFO, offsetof(struct paca_struct, __current) + > offsetof(struct task_struct, thread_info)); > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S > index af25db6e0205..cd9e56b25934 100644 > --- a/arch/powerpc/kernel/entry_64.S > +++ b/arch/powerpc/kernel/entry_64.S > @@ -253,6 +253,13 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) > mr r1,r8 /* start using new stack pointer */ > std r7,PACAKSAVE(r13) > > +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S_64) > + ld r8,KSP_LINEAR_BASE(r4) > + std r8,PACAKSTACK_LINEAR_BASE(r13) > + ld r8,KSP_VMALLOC_BASE(r4) > + std r8,PACAKSTACK_VMALLOC_BASE(r13) Do you only have r8 to play with ? Otherwise I'd suggest to perform the two ld then the two std. Or maybe that doesn't matter on ppc64. > +#endif /* CONFIG_VMAP_STACK && CONFIG_PPC_BOOK3S_64 */ > + > ld r6,_CCR(r1) > mtcrf 0xFF,r6 > > diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c > index 67da147fe34d..07917726c629 100644 > --- a/arch/powerpc/kernel/process.c > +++ b/arch/powerpc/kernel/process.c > @@ -1782,6 +1782,10 @@ int copy_thread(struct task_struct *p, const struct > kernel_clone_args *args) > kregs = (struct pt_regs *) sp; > sp -= STACK_FRAME_OVERHEAD; > p->thread.ksp = sp; > +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S_64) > + p->thread.ksp_vmalloc_base = sp & ~(THREAD_SIZE - 1); > + p->thread.ksp_linear_base = (u64)__va(vmalloc_to_pfn((void *)sp) << > PAGE_SHIFT); What about: page_to_virt(vmalloc_to_page((void *)sp)) But is that really the linear base you want, isn't it the phys address ? In that case you can do: page_to_phys(vmalloc_to_page((void *)sp)) > +#endif /* CONFIG_VMAP_STACK && CONFIG_PPC_BOOK3S_64 */ > #ifdef CONFIG_HAVE_HW_BREAKPOINT > for (i = 0; i < nr_wp_slots(); i++) > p->thread.ptrace_bps[i] = NULL; > diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c > index 0da6e59161cd..466ccab5adb8 100644 > --- a/arch/powerpc/kernel/smp.c > +++ b/arch/powerpc/kernel/smp.c > @@ -60,6 +60,7 @@ > #include <asm/ftrace.h> > #include <asm/kup.h> > #include <asm/fadump.h> > +#include <asm/book3s/64/stack.h> Could we avoid including a book3s/64 header directly ? Could it come via a more generic one, maybe pgtable.h ? As you can see with git grep "asm/book3s/64" arch/powerpc/ There are no direct inclusion of book3s64 headers in generic C files. > > #ifdef DEBUG > #include <asm/udbg.h> > @@ -1250,6 +1251,12 @@ static void cpu_idle_thread_init(unsigned int cpu, > struct task_struct *idle) > paca_ptrs[cpu]->__current = idle; > paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) + > THREAD_SIZE - STACK_FRAME_OVERHEAD; > +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S_64) > + paca_ptrs[cpu]->kstack_linear_base = is_vmalloc_addr((void > *)paca_ptrs[cpu]->kstack) ? > + vmalloc_to_phys((void *)(paca_ptrs[cpu]->kstack)) : > + paca_ptrs[cpu]->kstack; > + paca_ptrs[cpu]->kstack_vmalloc_base = paca_ptrs[cpu]->kstack & > (THREAD_SIZE - 1); > +#endif // CONFIG_VMAP_STACK && CONFIG_PPC_BOOK3S_64 > #endif > task_thread_info(idle)->cpu = cpu; > secondary_current = current_set[cpu] = idle; > diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c > index f51c882bf902..236287c4a231 100644 > --- a/arch/powerpc/xmon/xmon.c > +++ b/arch/powerpc/xmon/xmon.c > @@ -2697,6 +2697,10 @@ static void dump_one_paca(int cpu) > DUMP(p, __current, "%-*px"); > DUMP(p, kstack, "%#-*llx"); > printf(" %-*s = 0x%016llx\n", 25, "kstack_base", p->kstack & > ~(THREAD_SIZE - 1)); > +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S_64) > + DUMP(p, kstack_linear_base, "%#-*llx"); > + DUMP(p, kstack_vmalloc_base, "%#-*llx"); > +#endif > #ifdef CONFIG_STACKPROTECTOR > DUMP(p, canary, "%#-*lx"); > #endif