Implement page mapping percpu first chunk allocator as a fallback to
the embedding allocator. With 4K hash translation we limit our page
table range to 64TB and commit: 0034d395f89d ("powerpc/mm/hash64: Map all the
kernel regions in the same 0xc range") moved all kernel mapping to
that 64TB range. In-order to support sparse memory layout we need
to increase our linear mapping space and reduce other mappings.

With such a layout percpu embedded first chunk allocator will fail
because of small vmalloc range. Add a fallback to page mapping
percpu first chunk allocator for such failures.

Signed-off-by: Aneesh Kumar K.V <aneesh.ku...@linux.ibm.com>
---
 arch/powerpc/Kconfig           |  5 ++-
 arch/powerpc/kernel/setup_64.c | 59 ++++++++++++++++++++++++++++++++--
 2 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6a7c797fa9d2..69d29e3486f8 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -63,7 +63,10 @@ config HAVE_SETUP_PER_CPU_AREA
        def_bool PPC64
 
 config NEED_PER_CPU_EMBED_FIRST_CHUNK
-       def_bool PPC64
+       def_bool y if PPC64
+
+config NEED_PER_CPU_PAGE_FIRST_CHUNK
+       def_bool y if PPC64
 
 config NR_IRQS
        int "Number of virtual interrupt numbers"
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 44b4c432a273..4049d450afd6 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -65,6 +65,7 @@
 #include <asm/hw_irq.h>
 #include <asm/feature-fixups.h>
 #include <asm/kup.h>
+#include <asm/pgalloc.h>
 
 #include "setup.h"
 
@@ -761,13 +762,56 @@ static int pcpu_cpu_distance(unsigned int from, unsigned 
int to)
 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(__per_cpu_offset);
 
+static void __init pcpu_populate_pte(unsigned long addr)
+{
+       pgd_t *pgd = pgd_offset_k(addr);
+       pud_t *pud;
+       pmd_t *pmd;
+
+       if (pgd_none(*pgd)) {
+               pud_t *new;
+
+               new = memblock_alloc_from(PUD_TABLE_SIZE, PUD_TABLE_SIZE, 
PUD_TABLE_SIZE);
+               if (!new)
+                       goto err_alloc;
+               pgd_populate(&init_mm, pgd, new);
+       }
+
+       pud = pud_offset(pgd, addr);
+       if (pud_none(*pud)) {
+               pmd_t *new;
+
+               new = memblock_alloc_from(PMD_TABLE_SIZE, PMD_TABLE_SIZE, 
PMD_TABLE_SIZE);
+               if (!new)
+                       goto err_alloc;
+               pud_populate(&init_mm, pud, new);
+       }
+
+       pmd = pmd_offset(pud, addr);
+       if (!pmd_present(*pmd)) {
+               pte_t *new;
+
+               new = memblock_alloc_from(PTE_TABLE_SIZE, PTE_TABLE_SIZE, 
PTE_TABLE_SIZE);
+               if (!new)
+                       goto err_alloc;
+               pmd_populate_kernel(&init_mm, pmd, new);
+       }
+
+       return;
+
+err_alloc:
+       panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
+             __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+}
+
+
 void __init setup_per_cpu_areas(void)
 {
        const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
        size_t atom_size;
        unsigned long delta;
        unsigned int cpu;
-       int rc;
+       int rc = -EINVAL;
 
        /*
         * Linear mapping is one of 4K, 1M and 16M.  For 4K, no need
@@ -779,8 +823,19 @@ void __init setup_per_cpu_areas(void)
        else
                atom_size = 1 << 20;
 
-       rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
+       if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+               rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, 
pcpu_cpu_distance,
                                    pcpu_fc_alloc, pcpu_fc_free);
+               if (rc)
+                       pr_warning("PERCPU: %s allocator failed (%d), "
+                                  "falling back to page size\n",
+                                  pcpu_fc_names[pcpu_chosen_fc], rc);
+       }
+
+       if (rc < 0)
+               rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
+                                          pcpu_fc_alloc, pcpu_fc_free,
+                                          pcpu_populate_pte);
        if (rc < 0)
                panic("cannot initialize percpu area (err=%d)", rc);
 
-- 
2.21.0

Reply via email to