Calculating the slice mask can become a signifcant overhead for
get_unmapped_area. The mask is relatively small and does not change
frequently, so we can cache it in the mm context.

This saves about 30% kernel time on a 4K user address allocation
in a microbenchmark.

Comments on the approach taken? I think there is the option for fixed
allocations to avoid some of the slice calculation entirely, but first
I think it will be good to have a general speedup that covers all
mmaps.

Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Anton Blanchard <an...@samba.org>
---
 arch/powerpc/include/asm/book3s/64/mmu.h |  8 +++++++
 arch/powerpc/mm/slice.c                  | 39 ++++++++++++++++++++++++++++++--
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h 
b/arch/powerpc/include/asm/book3s/64/mmu.h
index 5854263..0d15af4 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -71,6 +71,14 @@ typedef struct {
 #ifdef CONFIG_PPC_MM_SLICES
        u64 low_slices_psize;   /* SLB page size encodings */
        unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
+       struct slice_mask mask_4k;
+# ifdef CONFIG_PPC_64K_PAGES
+       struct slice_mask mask_64k;
+# endif
+# ifdef CONFIG_HUGETLB_PAGE
+       struct slice_mask mask_16m;
+       struct slice_mask mask_16g;
+# endif
 #else
        u16 sllp;               /* SLB page size encoding */
 #endif
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 2b27458..559ea5f 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -147,7 +147,7 @@ static struct slice_mask slice_mask_for_free(struct 
mm_struct *mm)
        return ret;
 }
 
-static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
+static struct slice_mask calc_slice_mask_for_size(struct mm_struct *mm, int 
psize)
 {
        unsigned char *hpsizes;
        int index, mask_index;
@@ -171,6 +171,36 @@ static struct slice_mask slice_mask_for_size(struct 
mm_struct *mm, int psize)
        return ret;
 }
 
+static void recalc_slice_mask_cache(struct mm_struct *mm)
+{
+       mm->context.mask_4k = calc_slice_mask_for_size(mm, MMU_PAGE_4K);
+#ifdef CONFIG_PPC_64K_PAGES
+       mm->context.mask_64k = calc_slice_mask_for_size(mm, MMU_PAGE_64K);
+#endif
+# ifdef CONFIG_HUGETLB_PAGE
+       /* Radix does not come here */
+       mm->context.mask_16m = calc_slice_mask_for_size(mm, MMU_PAGE_16M);
+       mm->context.mask_16g = calc_slice_mask_for_size(mm, MMU_PAGE_16G);
+# endif
+}
+
+static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
+{
+       if (psize == MMU_PAGE_4K)
+               return mm->context.mask_4k;
+#ifdef CONFIG_PPC_64K_PAGES
+       if (psize == MMU_PAGE_64K)
+               return mm->context.mask_64k;
+#endif
+# ifdef CONFIG_HUGETLB_PAGE
+       if (psize == MMU_PAGE_16M)
+               return mm->context.mask_16m;
+       if (psize == MMU_PAGE_16G)
+               return mm->context.mask_16g;
+# endif
+       BUG();
+}
+
 static int slice_check_fit(struct slice_mask mask, struct slice_mask available)
 {
        return (mask.low_slices & available.low_slices) == mask.low_slices &&
@@ -233,6 +263,8 @@ static void slice_convert(struct mm_struct *mm, struct 
slice_mask mask, int psiz
 
        spin_unlock_irqrestore(&slice_convert_lock, flags);
 
+       recalc_slice_mask_cache(mm);
+
        copro_flush_all_slbs(mm);
 }
 
@@ -625,7 +657,7 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned 
int psize)
                goto bail;
 
        mm->context.user_psize = psize;
-       wmb();
+       wmb(); /* Why? */
 
        lpsizes = mm->context.low_slices_psize;
        for (i = 0; i < SLICE_NUM_LOW; i++)
@@ -652,6 +684,9 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned 
int psize)
                  mm->context.low_slices_psize,
                  mm->context.high_slices_psize);
 
+       spin_unlock_irqrestore(&slice_convert_lock, flags);
+       recalc_slice_mask_cache(mm);
+       return;
  bail:
        spin_unlock_irqrestore(&slice_convert_lock, flags);
 }
-- 
2.8.1

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to