On Tue, Oct 13, 2020 at 05:46:15PM +0200, Peter Zijlstra wrote: > Nah, that generic function, should work for 90% of all archs, it's just > a few oddballs that need something else. > > Also, if we add that hugetlb exception, we'll even get the usermap for > those oddballs right. > > I'll take this version after the merge window, I'll add __weak for the > oddballs and also add the hugetlb userspace thing on top.
Like so.. --- Subject: perf,mm: Handle non-page-table-aligned hugetlbfs From: Peter Zijlstra <pet...@infradead.org> Date: Fri, 9 Oct 2020 11:09:27 +0200 A limited nunmber of architectures support hugetlbfs sizes that do not align with the page-tables (ARM64, Power, Sparc64). Add support for this to the generic perf_get_page_size() implementation, and also allow an architecture to override this implementation. This latter is only needed when it uses non-page-table aligned huge pages in its kernel map. Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org> --- kernel/events/core.c | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6996,10 +6996,18 @@ static u64 perf_virt_to_phys(u64 virt) #ifdef CONFIG_MMU /* - * Return the MMU page size of a given virtual address + * Return the MMU page size of a given virtual address. + * + * This generic implementation handles page-table aligned huge pages, as well + * as non-page-table aligned hugetlbfs compound pages. + * + * If an architecture supports and uses non-page-table aligned pages in their + * kernel mapping it will need to provide it's own implementation of this + * function. */ -static u64 __perf_get_page_size(struct mm_struct *mm, unsigned long addr) +__weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr) { + struct page *page; pgd_t *pgd; p4d_t *p4d; pud_t *pud; @@ -7021,15 +7029,27 @@ static u64 __perf_get_page_size(struct m if (!pud_present(*pud)) return 0; - if (pud_leaf(*pud)) + if (pud_leaf(*pud)) { +#ifdef pud_page + page = pud_page(*pud); + if (PageHuge(page)) + return page_size(compound_head(page)); +#endif return 1ULL << PUD_SHIFT; + } pmd = pmd_offset(pud, addr); if (!pmd_present(*pmd)) return 0; - if (pmd_leaf(*pmd)) + if (pmd_leaf(*pmd)) { +#ifdef pmd_page + page = pmd_page(*pmd); + if (PageHuge(page)) + return page_size(compound_head(page)); +#endif return 1ULL << PMD_SHIFT; + } pte = pte_offset_map(pmd, addr); if (!pte_present(*pte)) { @@ -7037,13 +7057,20 @@ static u64 __perf_get_page_size(struct m return 0; } + page = pte_page(*pte); + if (PageHuge(page)) { + u64 size = page_size(compound_head(page)); + pte_unmap(pte); + return size; + } + pte_unmap(pte); return PAGE_SIZE; } #else -static u64 __perf_get_page_size(struct mm_struct *mm, unsigned long addr) +static u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr) { return 0; } @@ -7074,7 +7101,7 @@ static u64 perf_get_page_size(unsigned l mm = &init_mm; } - size = __perf_get_page_size(mm, addr); + size = arch_perf_get_page_size(mm, addr); local_irq_restore(flags);