Hari Bathini <hbath...@linux.ibm.com> writes: > With commit b33f778bba5ef ("kfence: alloc kfence_pool after system > startup"), KFENCE pool can be allocated after system startup via the > page allocator. This can lead to problems as all memory is not mapped > at page granularity anymore with CONFIG_KFENCE. Address this by direct > mapping all memory at PMD level and split the mapping for PMD pages > that overlap with __kfence_pool to page level granularity if and when > __kfence_pool is allocated after system startup. > > Signed-off-by: Hari Bathini <hbath...@linux.ibm.com> > --- > arch/powerpc/include/asm/book3s/64/radix.h | 2 + > arch/powerpc/include/asm/kfence.h | 14 +++++- > arch/powerpc/mm/book3s64/radix_pgtable.c | 50 +++++++++++++++++++++- > 3 files changed, 64 insertions(+), 2 deletions(-) > > diff --git a/arch/powerpc/include/asm/book3s/64/radix.h > b/arch/powerpc/include/asm/book3s/64/radix.h > index 8f55ff74bb68..0423ddbcf73c 100644 > --- a/arch/powerpc/include/asm/book3s/64/radix.h > +++ b/arch/powerpc/include/asm/book3s/64/radix.h > @@ -340,6 +340,8 @@ extern void radix__vmemmap_remove_mapping(unsigned long > start, > extern int radix__map_kernel_page(unsigned long ea, unsigned long pa, > pgprot_t flags, unsigned int psz); > > +extern bool radix_kfence_init_pool(void); > + > static inline unsigned long radix__get_tree_size(void) > { > unsigned long rts_field; > diff --git a/arch/powerpc/include/asm/kfence.h > b/arch/powerpc/include/asm/kfence.h > index 18ec2b06ba1e..c5d2fb2f9ecb 100644 > --- a/arch/powerpc/include/asm/kfence.h > +++ b/arch/powerpc/include/asm/kfence.h > @@ -18,12 +18,24 @@ > > #ifdef CONFIG_KFENCE > extern bool kfence_early_init; > -#endif > + > +static inline bool kfence_alloc_pool_late(void) > +{ > + return !kfence_early_init; > +}
Minor nit, but do we need kfence_alloc_pool_late()? The function name looks confusing. Can we not just use !kfence_early_init? If not then maybe bool kfence_late_init? > > static inline bool arch_kfence_init_pool(void) > { > +#ifdef CONFIG_PPC_BOOK3S_64 > + if (radix_enabled()) > + return radix_kfence_init_pool(); Can we directly check... if (radix_enabled() && !kfence_early_init) ... instead of embedding the check inside radix_kfence_late_init_pool() > +#endif > + > return true; > } > +#else > +static inline bool kfence_alloc_pool_late(void) { return false; } > +#endif > > #ifdef CONFIG_PPC64 > static inline bool kfence_protect_page(unsigned long addr, bool protect) > diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c > b/arch/powerpc/mm/book3s64/radix_pgtable.c > index fccbf92f279b..f4374e3e31e1 100644 > --- a/arch/powerpc/mm/book3s64/radix_pgtable.c > +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c > @@ -253,6 +253,53 @@ void radix__mark_initmem_nx(void) > } > #endif /* CONFIG_STRICT_KERNEL_RWX */ > > +#ifdef CONFIG_KFENCE > +static inline int radix_split_pmd_page(pmd_t *pmd, unsigned long addr) > +{ > + pte_t *pte = pte_alloc_one_kernel(&init_mm); > + unsigned long pfn = PFN_DOWN(__pa(addr)); Minor nit. Since addr will always be page aligned, so maybe PHYS_PFN() is better suited. Although it does not matter. > + int i; > + > + if (!pte) > + return -ENOMEM; > + > + for (i = 0; i < PTRS_PER_PTE; i++) { > + __set_pte_at(&init_mm, addr, pte + i, pfn_pte(pfn + i, > PAGE_KERNEL), 0); > + asm volatile("ptesync": : :"memory"); > + } Maybe a comment above the loop on why __set_pte_at() is ok for late kfence init? and why not pte_update()? [1] [1]: https://lore.kernel.org/linuxppc-dev/87y318wp9r....@linux.ibm.com/ > + pmd_populate_kernel(&init_mm, pmd, pte); > + > + flush_tlb_kernel_range(addr, addr + PMD_SIZE); > + return 0; > +} > + > +bool radix_kfence_init_pool(void) > +{ > + unsigned int page_psize, pmd_psize; > + unsigned long addr; > + pmd_t *pmd; > + > + if (!kfence_alloc_pool_late()) > + return true; > + > + page_psize = shift_to_mmu_psize(PAGE_SHIFT); > + pmd_psize = shift_to_mmu_psize(PMD_SHIFT); > + for (addr = (unsigned long)__kfence_pool; is_kfence_address((void > *)addr); > + addr += PAGE_SIZE) { > + pmd = pmd_off_k(addr); > + > + if (pmd_leaf(*pmd)) { > + if (radix_split_pmd_page(pmd, addr & PMD_MASK)) > + return false; > + update_page_count(pmd_psize, -1); > + update_page_count(page_psize, PTRS_PER_PTE); > + } > + } > + > + return true; > +} > +#endif > + > static inline void __meminit > print_mapping(unsigned long start, unsigned long end, unsigned long size, > bool exec) > { > @@ -391,7 +438,8 @@ static void __init radix_init_pgtable(void) > continue; > } > > - WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL, > ~0UL)); > + WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL, > + kfence_alloc_pool_late() ? > PMD_SIZE : ~0UL)); So everytime we have !kfence_early_init to true, we always use PMD_SIZE. So do we never map 1G mapping for direct map? > } > > #ifdef CONFIG_KFENCE > -- > 2.44.0