From: Fenghua Yu <fenghua...@intel.com> X86 32-bit machine and kernel use PAE paging, which currently wastes about 4K of memory per process on Linux where we have to reserve an entire page to support a single 256-byte PGD structure. It would be a very good thing if we could eliminate that wastage.
Signed-off-by: Fenghua Yu <fenghua...@intel.com> --- arch/x86/mm/pgtable.c | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 6fb6927..695db92 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -1,5 +1,6 @@ #include <linux/mm.h> #include <linux/gfp.h> +#include <linux/slab.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> #include <asm/tlb.h> @@ -271,12 +272,46 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) } } +/* + * Xen paravirt assumes pgd table should be in one page. pgd in 64 bit also + * needs to be in one page. + * + * But PAE without Xen only needs to allocate 256 bytes for pgd. + * + * So if kernel is compiled as PAE model without Xen, we allocate 256 bytes + * for pgd entries to save memory space. + * + * In other cases, one page is allocated for pgd. In theory, a kernel + * in PAE mode not running in Xen could allocate 256 bytes for pgd + * as well. But that will make the allocation and free more complex + * but not useful in reality. To simplify the code and testing, we just + * allocate one page when CONFIG_XEN is enabled regardelss kernel is running + * in Xen or not. + */ +static inline pgd_t *_pgd_alloc(void) +{ +#if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN) + return kmalloc(sizeof(pgdval_t) * PTRS_PER_PGD, PGALLOC_GFP); +#else + return (pgd_t *)__get_free_page(PGALLOC_GFP); +#endif +} + +static inline void _pgd_free(pgd_t *pgd) +{ +#if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN) + kfree(pgd); +#else + free_page((unsigned long)pgd); +#endif +} + pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; pmd_t *pmds[PREALLOCATED_PMDS]; - pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); + pgd = _pgd_alloc(); if (pgd == NULL) goto out; @@ -306,7 +341,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) out_free_pmds: free_pmds(pmds); out_free_pgd: - free_page((unsigned long)pgd); + _pgd_free(pgd); out: return NULL; } @@ -316,7 +351,8 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) pgd_mop_up_pmds(mm, pgd); pgd_dtor(pgd); paravirt_pgd_free(mm, pgd); - free_page((unsigned long)pgd); + _pgd_free(pgd); + } /* -- 1.8.1.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/