On Tue, 12 Jan 2016 12:45:38 +0530 "Aneesh Kumar K.V" <aneesh.ku...@linux.vnet.ibm.com> wrote:
> This is needed so that we can support both hash and radix page table > using single kernel. Radix kernel uses a 4 level table. > > Signed-off-by: Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com> > --- > arch/powerpc/Kconfig | 1 + > arch/powerpc/include/asm/book3s/64/hash-4k.h | 33 > +-------------------------- > arch/powerpc/include/asm/book3s/64/hash-64k.h | 20 +++++++++------- > arch/powerpc/include/asm/book3s/64/hash.h | 8 +++++++ > arch/powerpc/include/asm/book3s/64/pgtable.h | 25 > +++++++++++++++++++- arch/powerpc/include/asm/pgalloc-64.h | > 24 ++++++++++++++++--- arch/powerpc/include/asm/pgtable-types.h > | 13 +++++++---- arch/powerpc/mm/init_64.c | 21 > ++++++++++++----- 8 files changed, 90 insertions(+), 55 deletions(-) > > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig > index 378f1127ca98..618afea4c9fc 100644 > --- a/arch/powerpc/Kconfig > +++ b/arch/powerpc/Kconfig > @@ -303,6 +303,7 @@ config ZONE_DMA32 snip > - > #define PTE_INDEX_SIZE 8 > -#define PMD_INDEX_SIZE 10 > -#define PUD_INDEX_SIZE 0 > +#define PMD_INDEX_SIZE 5 > +#define PUD_INDEX_SIZE 5 > #define PGD_INDEX_SIZE 12 > OK, so PMD index split from 10 to 5 and 5 to PMD/PUD? What is the plan for huge pages, I saw you mentioned it was a TODO > #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) > #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) > +#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) > #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) > > /* With 4k base page size, hugepage PTEs go at the PMD level */ > @@ -20,8 +19,13 @@ > #define PMD_SIZE (1UL << PMD_SHIFT) > #define PMD_MASK (~(PMD_SIZE-1)) > > +/* PUD_SHIFT determines what a third-level page table entry can map > */ +#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) > +#define PUD_SIZE (1UL << PUD_SHIFT) > +#define PUD_MASK (~(PUD_SIZE-1)) > + > /* PGDIR_SHIFT determines what a third-level page table entry can > map */ -#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) > +#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) > #define PGDIR_SIZE (1UL << PGDIR_SHIFT) > #define PGDIR_MASK (~(PGDIR_SIZE-1)) > > @@ -61,6 +65,8 @@ > #define PMD_MASKED_BITS (PTE_FRAG_SIZE - 1) > /* Bits to mask out from a PGD/PUD to get to the PMD page */ The comment looks like it applied to PMD and not PUD. > #define PUD_MASKED_BITS 0x1ff Given that PUD is now 5 bits, this should be 0x1f? > +/* FIXME!! check this */ > +#define PGD_MASKED_BITS 0 > PGD_MASKED_BITS is 0? Shouldn't it be 0xfe > #ifndef __ASSEMBLY__ > > @@ -130,11 +136,9 @@ extern bool __rpte_sub_valid(real_pte_t rpte, > unsigned long index); #else > #define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) > #endif > +#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) > #define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) > > -#define pgd_pte(pgd) (pud_pte(((pud_t){ pgd }))) > -#define pte_pgd(pte) ((pgd_t)pte_pud(pte)) > - > #ifdef CONFIG_HUGETLB_PAGE > /* > * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can > have diff --git a/arch/powerpc/include/asm/book3s/64/hash.h > b/arch/powerpc/include/asm/book3s/64/hash.h index > f46974d0134a..9ff1e056acef 100644 --- > a/arch/powerpc/include/asm/book3s/64/hash.h +++ > b/arch/powerpc/include/asm/book3s/64/hash.h @@ -226,6 +226,7 @@ > #define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS) > > #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & > (PTRS_PER_PGD - 1)) +#define pud_index(address) (((address) >> > (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) #define pmd_index(address) > (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1)) #define > pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1)) > @@ -354,8 +355,15 @@ static inline void __ptep_set_access_flags(pte_t > *ptep, pte_t entry) :"cc"); > } > > +static inline int pgd_bad(pgd_t pgd) > +{ > + return (pgd_val(pgd) == 0); > +} > + > #define __HAVE_ARCH_PTE_SAME > #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & > ~_PAGE_HPTEFLAGS) == 0) +#define pgd_page_vaddr(pgd) > (pgd_val(pgd) & ~PGD_MASKED_BITS) + > > /* Generic accessors to PTE bits */ > static inline int pte_write(pte_t pte) > { return !!(pte_val(pte) & _PAGE_RW);} diff --git > a/arch/powerpc/include/asm/book3s/64/pgtable.h > b/arch/powerpc/include/asm/book3s/64/pgtable.h index > e7162dba987e..8f639401c7ba 100644 --- > a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ > b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -111,6 +111,26 @@ > static inline void pgd_set(pgd_t *pgdp, unsigned long val) *pgdp = > __pgd(val); } > +static inline void pgd_clear(pgd_t *pgdp) > +{ > + *pgdp = __pgd(0); > +} > + > +#define pgd_none(pgd) (!pgd_val(pgd)) > +#define pgd_present(pgd) (!pgd_none(pgd)) > + > +static inline pte_t pgd_pte(pgd_t pgd) > +{ > + return __pte(pgd_val(pgd)); > +} > + > +static inline pgd_t pte_pgd(pte_t pte) > +{ > + return __pgd(pte_val(pte)); > +} > + > +extern struct page *pgd_page(pgd_t pgd); > + > /* > * Find an entry in a page-table-directory. We combine the address > region > * (the high order N bits) and the pgd portion of the address. > @@ -118,9 +138,10 @@ static inline void pgd_set(pgd_t *pgdp, unsigned > long val) > #define pgd_offset(mm, address) ((mm)->pgd + > pgd_index(address)) > +#define pud_offset(pgdp, addr) \ > + (((pud_t *) pgd_page_vaddr(*(pgdp))) + pud_index(addr)) > #define pmd_offset(pudp,addr) \ > (((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr)) > - > #define pte_offset_kernel(dir,addr) \ > (((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr)) > > @@ -135,6 +156,8 @@ static inline void pgd_set(pgd_t *pgdp, unsigned > long val) pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, > pte_val(e)) #define pmd_ERROR(e) \ > pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, > pmd_val(e)) +#define pud_ERROR(e) \ > + pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, > pud_val(e)) #define pgd_ERROR(e) \ > pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, > pgd_val(e)) > diff --git a/arch/powerpc/include/asm/pgalloc-64.h > b/arch/powerpc/include/asm/pgalloc-64.h index > 69ef28a81733..014489a619d0 100644 --- > a/arch/powerpc/include/asm/pgalloc-64.h +++ > b/arch/powerpc/include/asm/pgalloc-64.h @@ -171,7 +171,25 @@ extern > void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int > shift); extern void __tlb_remove_table(void *_table); #endif > > -#define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned > long)pmd) +#ifndef __PAGETABLE_PUD_FOLDED > +/* book3s 64 is 4 level page table */ > +#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD) > +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned > long addr) +{ > + return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), > + GFP_KERNEL|__GFP_REPEAT); > +} > + > +static inline void pud_free(struct mm_struct *mm, pud_t *pud) > +{ > + kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud); > +} > +#endif > + > +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, > pmd_t *pmd) +{ > + pud_set(pud, (unsigned long)pmd); > +} > > static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t > *pmd, pte_t *pte) > @@ -233,11 +251,11 @@ static inline void pmd_free(struct mm_struct > *mm, pmd_t *pmd) > #define __pmd_free_tlb(tlb, pmd, addr) \ > pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX) > -#ifndef CONFIG_PPC_64K_PAGES > +#ifndef __PAGETABLE_PUD_FOLDED > #define __pud_free_tlb(tlb, pud, addr) \ > pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE) > > -#endif /* CONFIG_PPC_64K_PAGES */ > +#endif /* __PAGETABLE_PUD_FOLDED */ > > #define check_pgt_cache() do { } while (0) > > diff --git a/arch/powerpc/include/asm/pgtable-types.h > b/arch/powerpc/include/asm/pgtable-types.h index > 71487e1ca638..43140f8b0592 100644 --- > a/arch/powerpc/include/asm/pgtable-types.h +++ > b/arch/powerpc/include/asm/pgtable-types.h @@ -21,15 +21,18 @@ static > inline unsigned long pmd_val(pmd_t x) return x.pmd; > } > > -/* PUD level exusts only on 4k pages */ > -#ifndef CONFIG_PPC_64K_PAGES > +/* > + * 64 bit hash always use 4 level table. Everybody else use 4 level > + * only for 4K page size. > + */ > +#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES) > typedef struct { unsigned long pud; } pud_t; > #define __pud(x) ((pud_t) { (x) }) > static inline unsigned long pud_val(pud_t x) > { > return x.pud; > } > -#endif /* !CONFIG_PPC_64K_PAGES */ > +#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */ > #endif /* CONFIG_PPC64 */ > > /* PGD level */ > @@ -66,14 +69,14 @@ static inline unsigned long pmd_val(pmd_t pmd) > return pmd; > } > > -#ifndef CONFIG_PPC_64K_PAGES > +#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES) > typedef unsigned long pud_t; > #define __pud(x) (x) > static inline unsigned long pud_val(pud_t pud) > { > return pud; > } > -#endif /* !CONFIG_PPC_64K_PAGES */ > +#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */ > #endif /* CONFIG_PPC64 */ > > typedef unsigned long pgd_t; > diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c > index 379a6a90644b..8ce1ec24d573 100644 > --- a/arch/powerpc/mm/init_64.c > +++ b/arch/powerpc/mm/init_64.c > @@ -85,6 +85,11 @@ static void pgd_ctor(void *addr) > memset(addr, 0, PGD_TABLE_SIZE); > } > > +static void pud_ctor(void *addr) > +{ > + memset(addr, 0, PUD_TABLE_SIZE); > +} > + > static void pmd_ctor(void *addr) > { > memset(addr, 0, PMD_TABLE_SIZE); > @@ -138,14 +143,18 @@ void pgtable_cache_init(void) > { > pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); > pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor); > + /* > + * In all current configs, when the PUD index exists it's the > + * same size as either the pgd or pmd index except with THP > enabled > + * on book3s 64 > + */ > + if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) > + pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor); > + > if (!PGT_CACHE(PGD_INDEX_SIZE) > || !PGT_CACHE(PMD_CACHE_INDEX)) panic("Couldn't allocate pgtable > caches"); > - /* In all current configs, when the PUD index exists it's the > - * same size as either the pgd or pmd index. Verify that the > - * initialization above has also created a PUD cache. This > - * will need re-examiniation if we add new possibilities for > - * the pagetable layout. */ > - BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)); > + if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) > + panic("Couldn't allocate pud pgtable caches"); > } > > #ifdef CONFIG_SPARSEMEM_VMEMMAP _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev