Hari Bathini <hbath...@linux.ibm.com> writes:

> With commit b33f778bba5ef ("kfence: alloc kfence_pool after system
> startup"), KFENCE pool can be allocated after system startup via the
> page allocator. This can lead to problems as all memory is not mapped
> at page granularity anymore with CONFIG_KFENCE. Address this by direct
> mapping all memory at PMD level and split the mapping for PMD pages
> that overlap with __kfence_pool to page level granularity if and when
> __kfence_pool is allocated after system startup.
>
> Signed-off-by: Hari Bathini <hbath...@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/book3s/64/radix.h |  2 +
>  arch/powerpc/include/asm/kfence.h          | 14 +++++-
>  arch/powerpc/mm/book3s64/radix_pgtable.c   | 50 +++++++++++++++++++++-
>  3 files changed, 64 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/radix.h 
> b/arch/powerpc/include/asm/book3s/64/radix.h
> index 8f55ff74bb68..0423ddbcf73c 100644
> --- a/arch/powerpc/include/asm/book3s/64/radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/radix.h
> @@ -340,6 +340,8 @@ extern void radix__vmemmap_remove_mapping(unsigned long 
> start,
>  extern int radix__map_kernel_page(unsigned long ea, unsigned long pa,
>                                pgprot_t flags, unsigned int psz);
>  
> +extern bool radix_kfence_init_pool(void);
> +
>  static inline unsigned long radix__get_tree_size(void)
>  {
>       unsigned long rts_field;
> diff --git a/arch/powerpc/include/asm/kfence.h 
> b/arch/powerpc/include/asm/kfence.h
> index 18ec2b06ba1e..c5d2fb2f9ecb 100644
> --- a/arch/powerpc/include/asm/kfence.h
> +++ b/arch/powerpc/include/asm/kfence.h
> @@ -18,12 +18,24 @@
>  
>  #ifdef CONFIG_KFENCE
>  extern bool kfence_early_init;
> -#endif
> +
> +static inline bool kfence_alloc_pool_late(void)
> +{
> +     return !kfence_early_init;
> +}

Minor nit, but do we need kfence_alloc_pool_late()?
The function name looks confusing. Can we not just use
!kfence_early_init? If not then maybe bool kfence_late_init?

>  
>  static inline bool arch_kfence_init_pool(void)
>  {
> +#ifdef CONFIG_PPC_BOOK3S_64
> +     if (radix_enabled())
> +             return radix_kfence_init_pool();

Can we directly check...
        if (radix_enabled() && !kfence_early_init)
... instead of embedding the check inside radix_kfence_late_init_pool()

> +#endif
> +
>       return true;
>  }
> +#else
> +static inline bool kfence_alloc_pool_late(void) { return false; }
> +#endif
>  
>  #ifdef CONFIG_PPC64
>  static inline bool kfence_protect_page(unsigned long addr, bool protect)
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
> b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index fccbf92f279b..f4374e3e31e1 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -253,6 +253,53 @@ void radix__mark_initmem_nx(void)
>  }
>  #endif /* CONFIG_STRICT_KERNEL_RWX */
>  
> +#ifdef CONFIG_KFENCE
> +static inline int radix_split_pmd_page(pmd_t *pmd, unsigned long addr)
> +{
> +     pte_t *pte = pte_alloc_one_kernel(&init_mm);
> +     unsigned long pfn = PFN_DOWN(__pa(addr));

Minor nit. Since addr will always be page aligned, so maybe PHYS_PFN() is better
suited. Although it does not matter.

> +     int i;
> +
> +     if (!pte)
> +             return -ENOMEM;
> +
> +     for (i = 0; i < PTRS_PER_PTE; i++) {
> +             __set_pte_at(&init_mm, addr, pte + i, pfn_pte(pfn + i, 
> PAGE_KERNEL), 0);
> +             asm volatile("ptesync": : :"memory");
> +     }

Maybe a comment above the loop on why __set_pte_at() is ok for late
kfence init? and why not pte_update()? [1]

[1]: https://lore.kernel.org/linuxppc-dev/87y318wp9r....@linux.ibm.com/


> +     pmd_populate_kernel(&init_mm, pmd, pte);
> +
> +     flush_tlb_kernel_range(addr, addr + PMD_SIZE);
> +     return 0;
> +}
> +
> +bool radix_kfence_init_pool(void)
> +{
> +     unsigned int page_psize, pmd_psize;
> +     unsigned long addr;
> +     pmd_t *pmd;
> +
> +     if (!kfence_alloc_pool_late())
> +             return true;
> +
> +     page_psize = shift_to_mmu_psize(PAGE_SHIFT);
> +     pmd_psize = shift_to_mmu_psize(PMD_SHIFT);
> +     for (addr = (unsigned long)__kfence_pool; is_kfence_address((void 
> *)addr);
> +          addr += PAGE_SIZE) {
> +             pmd = pmd_off_k(addr);
> +
> +             if (pmd_leaf(*pmd)) {
> +                     if (radix_split_pmd_page(pmd, addr & PMD_MASK))
> +                             return false;
> +                     update_page_count(pmd_psize, -1);
> +                     update_page_count(page_psize, PTRS_PER_PTE);
> +             }
> +     }
> +
> +     return true;
> +}
> +#endif
> +
>  static inline void __meminit
>  print_mapping(unsigned long start, unsigned long end, unsigned long size, 
> bool exec)
>  {
> @@ -391,7 +438,8 @@ static void __init radix_init_pgtable(void)
>                       continue;
>               }
>  
> -             WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL, 
> ~0UL));
> +             WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL,
> +                                             kfence_alloc_pool_late() ? 
> PMD_SIZE : ~0UL));

So everytime we have !kfence_early_init to true, we always use PMD_SIZE. 
So do we never map 1G mapping for direct map? 

>       }
>  
>  #ifdef CONFIG_KFENCE
> -- 
> 2.44.0

Reply via email to