Author: jeff
Date: Sun Aug 18 23:07:56 2019
New Revision: 351198
URL: https://svnweb.freebsd.org/changeset/base/351198

Log:
  Allocate amd64's page array using pages and page directory pages from the
  NUMA domain that the pages describe.  Patch original from gallatin.
  
  Reviewed by:  kib
  Tested by:    pho
  Sponsored by: Netflix
  Differential Revision:        https://reviews.freebsd.org/D21252

Modified:
  head/sys/amd64/amd64/pmap.c
  head/sys/amd64/include/pmap.h
  head/sys/amd64/include/vmparam.h
  head/sys/vm/vm_page.c
  head/sys/vm/vm_reserv.c
  head/sys/vm/vm_reserv.h

Modified: head/sys/amd64/amd64/pmap.c
==============================================================================
--- head/sys/amd64/amd64/pmap.c Sun Aug 18 22:20:28 2019        (r351197)
+++ head/sys/amd64/amd64/pmap.c Sun Aug 18 23:07:56 2019        (r351198)
@@ -383,6 +383,9 @@ static u_int64_t    DMPDphys;       /* phys addr of direct 
mapp
 static u_int64_t       DMPDPphys;      /* phys addr of direct mapped level 3 */
 static int             ndmpdpphys;     /* number of DMPDPphys pages */
 
+static uint64_t                PAPDPphys;      /* phys addr of page array 
level 3 */
+static int             npapdpphys;     /* number of PAPDPphys pages */
+
 static vm_paddr_t      KERNend;        /* phys addr of end of bootstrap data */
 
 /*
@@ -1427,6 +1430,16 @@ create_pagetables(vm_paddr_t *firstaddr)
        pml4_entry_t *p4_p;
        uint64_t DMPDkernphys;
 
+       npapdpphys = howmany(ptoa(Maxmem) / sizeof(struct vm_page), NBPML4);
+       if (npapdpphys > NPAPML4E) {
+               printf("NDMPML4E limits system to %lu GB\n",
+                   (NDMPML4E * 512) * (PAGE_SIZE / sizeof(struct vm_page)));
+               npapdpphys = NPAPML4E;
+               Maxmem = atop(NPAPML4E * NBPML4 *
+                   (PAGE_SIZE / sizeof(struct vm_page)));
+       }
+       PAPDPphys = allocpages(firstaddr, npapdpphys);
+
        /* Allocate page table pages for the direct map */
        ndmpdp = howmany(ptoa(Maxmem), NBPDP);
        if (ndmpdp < 4)         /* Minimum 4GB of dirmap */
@@ -1573,6 +1586,12 @@ create_pagetables(vm_paddr_t *firstaddr)
                p4_p[KPML4BASE + i] = KPDPphys + ptoa(i);
                p4_p[KPML4BASE + i] |= X86_PG_RW | X86_PG_V;
        }
+
+       /* Connect the page array slots up to the pml4. */
+       for (i = 0; i < npapdpphys; i++) {
+               p4_p[PAPML4I + i] = PAPDPphys + ptoa(i);
+               p4_p[PAPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx;
+       }
 }
 
 /*
@@ -3387,6 +3406,11 @@ pmap_pinit_pml4(vm_page_t pml4pg)
                    X86_PG_V;
        }
 
+       for (i = 0; i < npapdpphys; i++) {
+               pm_pml4[PAPML4I + i] = (PAPDPphys + ptoa(i)) | X86_PG_RW |
+                   X86_PG_V;
+       }
+
        /* install self-referential address mapping entry(s) */
        pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | X86_PG_V | X86_PG_RW |
            X86_PG_A | X86_PG_M;
@@ -3743,6 +3767,8 @@ pmap_release(pmap_t pmap)
                pmap->pm_pml4[KPML4BASE + i] = 0;
        for (i = 0; i < ndmpdpphys; i++)/* Direct Map */
                pmap->pm_pml4[DMPML4I + i] = 0;
+       for (i = 0; i < npapdpphys; i++)
+               pmap->pm_pml4[PAPML4I + i] = 0;
        pmap->pm_pml4[PML4PML4I] = 0;   /* Recursive Mapping */
        for (i = 0; i < lm_ents; i++)   /* Large Map */
                pmap->pm_pml4[LMSPML4I + i] = 0;
@@ -3779,6 +3805,44 @@ kvm_free(SYSCTL_HANDLER_ARGS)
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_free, "LU", "Amount of KVM free");
+
+void
+pmap_page_array_startup(long pages)
+{
+       pdp_entry_t *pdpe;
+       pd_entry_t *pde, newpdir;
+       vm_offset_t va, start, end;
+       vm_paddr_t pa;
+       long pfn;
+       int domain, i;
+
+       vm_page_array_size = pages;
+
+       start = va = PA_MIN_ADDRESS;
+       end = va + (pages * sizeof(struct vm_page));
+       while (va < end) {
+               pfn = first_page + ((va - start) / sizeof(struct vm_page));
+               domain = _vm_phys_domain(ctob(pfn));
+               pdpe = pmap_pdpe(kernel_pmap, va);
+               if ((*pdpe & X86_PG_V) == 0) {
+                       pa = vm_phys_early_alloc(domain, PAGE_SIZE);
+                       bzero((void *)PHYS_TO_DMAP(pa), PAGE_SIZE);
+                       *pdpe = (pdp_entry_t)(pa | X86_PG_V | X86_PG_RW |
+                           X86_PG_A | X86_PG_M);
+                       continue; /* try again */
+               }
+               pde = pmap_pdpe_to_pde(pdpe, va);
+               if ((*pde & X86_PG_V) != 0)
+                       panic("Unexpected pde");
+               pa = vm_phys_early_alloc(domain, NBPDR);
+               for (i = 0; i < NPDEPG; i++)
+                       dump_add_page(pa + (i * PAGE_SIZE));
+               newpdir = (pd_entry_t)(pa | X86_PG_V | X86_PG_RW | X86_PG_A |
+                   X86_PG_M | PG_PS | pg_g | pg_nx);
+               pde_store(pde, newpdir);
+               va += NBPDR;
+       }
+}
 
 /*
  * grow the number of kernel page table entries, if needed

Modified: head/sys/amd64/include/pmap.h
==============================================================================
--- head/sys/amd64/include/pmap.h       Sun Aug 18 22:20:28 2019        
(r351197)
+++ head/sys/amd64/include/pmap.h       Sun Aug 18 23:07:56 2019        
(r351198)
@@ -201,6 +201,13 @@
 #define        NDMPML4E        8
 
 /*
+ * NPAPML4E is the maximum number of PML4 entries that will be
+ * used to implement the page array.  This should be roughly 3% of
+ * NPDPML4E owing to 3% overhead for struct vm_page.
+ */
+#define        NPAPML4E        1
+
+/*
  * These values control the layout of virtual memory.  The starting address
  * of the direct map, which is controlled by DMPML4I, must be a multiple of
  * its size.  (See the PHYS_TO_DMAP() and DMAP_TO_PHYS() macros.)
@@ -219,7 +226,8 @@
 #define        PML4PML4I       (NPML4EPG/2)    /* Index of recursive pml4 
mapping */
 
 #define        KPML4BASE       (NPML4EPG-NKPML4E) /* KVM at highest addresses 
*/
-#define        DMPML4I         rounddown(KPML4BASE-NDMPML4E, NDMPML4E) /* 
Below KVM */
+#define        PAPML4I         (KPML4BASE-1-NPAPML4E) /* Below KVM */
+#define        DMPML4I         rounddown(PAPML4I-NDMPML4E, NDMPML4E) /* Below 
pages */
 
 #define        KPML4I          (NPML4EPG-1)
 #define        KPDPI           (NPDPEPG-2)     /* kernbase at -2GB */
@@ -467,6 +475,7 @@ int pmap_pkru_set(pmap_t pmap, vm_offset_t sva, vm_off
            u_int keyidx, int flags);
 void   pmap_thread_init_invl_gen(struct thread *td);
 int    pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap);
+void   pmap_page_array_startup(long count);
 #endif /* _KERNEL */
 
 /* Return various clipped indexes for a given VA */

Modified: head/sys/amd64/include/vmparam.h
==============================================================================
--- head/sys/amd64/include/vmparam.h    Sun Aug 18 22:20:28 2019        
(r351197)
+++ head/sys/amd64/include/vmparam.h    Sun Aug 18 23:07:56 2019        
(r351198)
@@ -160,7 +160,9 @@
  * 0xffff808000000000 - 0xffff847fffffffff   large map (can be tuned up)
  * 0xffff848000000000 - 0xfffff7ffffffffff   unused (large map extends there)
  * 0xfffff80000000000 - 0xfffffbffffffffff   4TB direct map
- * 0xfffffc0000000000 - 0xfffffdffffffffff   unused
+ * 0xfffffc0000000000 - 0xfffffcffffffffff   unused
+ * 0xfffffd0000000000 - 0xfffffd7fffffffff   page array 512GB
+ * 0xfffffd8000000000 - 0xfffffdffffffffff   unused
  * 0xfffffe0000000000 - 0xffffffffffffffff   2TB kernel map
  *
  * Within the kernel map:
@@ -175,6 +177,8 @@
 #define        DMAP_MIN_ADDRESS        KVADDR(DMPML4I, 0, 0, 0)
 #define        DMAP_MAX_ADDRESS        KVADDR(DMPML4I + NDMPML4E, 0, 0, 0)
 
+#define        PA_MIN_ADDRESS          KVADDR(PAPML4I, 0, 0, 0)
+
 #define        LARGEMAP_MIN_ADDRESS    KVADDR(LMSPML4I, 0, 0, 0)
 #define        LARGEMAP_MAX_ADDRESS    KVADDR(LMEPML4I + 1, 0, 0, 0)
 
@@ -210,6 +214,12 @@
            ("virtual address %#jx not covered by the DMAP",            \
            (uintmax_t)x));                                             \
        (x) & ~DMAP_MIN_ADDRESS; })
+
+/*
+ * amd64 statically allocates the page array address so that it can
+ * be more easily allocated on the correct memory domains.
+ */
+#define PMAP_HAS_PAGE_ARRAY    1
 
 /*
  * How many physical pages per kmem arena virtual page.

Modified: head/sys/vm/vm_page.c
==============================================================================
--- head/sys/vm/vm_page.c       Sun Aug 18 22:20:28 2019        (r351197)
+++ head/sys/vm/vm_page.c       Sun Aug 18 23:07:56 2019        (r351198)
@@ -135,7 +135,11 @@ static int vm_pageproc_waiters;
  */
 vm_page_t bogus_page;
 
+#ifdef PMAP_HAS_PAGE_ARRAY
+vm_page_t vm_page_array = (vm_page_t)PA_MIN_ADDRESS;
+#else
 vm_page_t vm_page_array;
+#endif
 long vm_page_array_size;
 long first_page;
 
@@ -522,6 +526,31 @@ vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segi
        pmap_page_init(m);
 }
 
+#ifndef PMAP_HAS_PAGE_ARRAY
+static vm_paddr_t
+vm_page_array_alloc(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t page_range)
+{
+       vm_paddr_t new_end;
+
+       /*
+        * Reserve an unmapped guard page to trap access to vm_page_array[-1].
+        * However, because this page is allocated from KVM, out-of-bounds
+        * accesses using the direct map will not be trapped.
+        */
+       *vaddr += PAGE_SIZE;
+
+       /*
+        * Allocate physical memory for the page structures, and map it.
+        */
+       new_end = trunc_page(end - page_range * sizeof(struct vm_page));
+       vm_page_array = (vm_page_t)pmap_map(vaddr, new_end, end,
+           VM_PROT_READ | VM_PROT_WRITE);
+       vm_page_array_size = page_range;
+
+       return (new_end);
+}
+#endif
+
 /*
  *     vm_page_startup:
  *
@@ -693,6 +722,11 @@ vm_page_startup(vm_offset_t vaddr)
 #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined."
 #endif
 
+#ifdef PMAP_HAS_PAGE_ARRAY
+       pmap_page_array_startup(size / PAGE_SIZE);
+       biggestone = vm_phys_avail_largest();
+       end = new_end = phys_avail[biggestone + 1];
+#else
 #ifdef VM_PHYSSEG_DENSE
        /*
         * In the VM_PHYSSEG_DENSE case, the number of pages can account for
@@ -723,31 +757,15 @@ vm_page_startup(vm_offset_t vaddr)
                }
        }
        end = new_end;
+       new_end = vm_page_array_alloc(&vaddr, end, page_range);
+#endif
 
-       /*
-        * Reserve an unmapped guard page to trap access to vm_page_array[-1].
-        * However, because this page is allocated from KVM, out-of-bounds
-        * accesses using the direct map will not be trapped.
-        */
-       vaddr += PAGE_SIZE;
-
-       /*
-        * Allocate physical memory for the page structures, and map it.
-        */
-       new_end = trunc_page(end - page_range * sizeof(struct vm_page));
-       mapped = pmap_map(&vaddr, new_end, end,
-           VM_PROT_READ | VM_PROT_WRITE);
-       vm_page_array = (vm_page_t)mapped;
-       vm_page_array_size = page_range;
-
 #if VM_NRESERVLEVEL > 0
        /*
         * Allocate physical memory for the reservation management system's
         * data structures, and map it.
         */
-       if (high_avail == end)
-               high_avail = new_end;
-       new_end = vm_reserv_startup(&vaddr, new_end, high_avail);
+       new_end = vm_reserv_startup(&vaddr, new_end);
 #endif
 #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \
     defined(__riscv)

Modified: head/sys/vm/vm_reserv.c
==============================================================================
--- head/sys/vm/vm_reserv.c     Sun Aug 18 22:20:28 2019        (r351197)
+++ head/sys/vm/vm_reserv.c     Sun Aug 18 23:07:56 2019        (r351198)
@@ -1360,10 +1360,23 @@ vm_reserv_size(int level)
  * management system's data structures, in particular, the reservation array.
  */
 vm_paddr_t
-vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t high_water)
+vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end)
 {
-       vm_paddr_t new_end;
+       vm_paddr_t new_end, high_water;
        size_t size;
+       int i;
+
+       high_water = phys_avail[1];
+       for (i = 0; i < vm_phys_nsegs; i++) {
+               if (vm_phys_segs[i].end > high_water)
+                       high_water = vm_phys_segs[i].end;
+       }
+
+       /* Skip the first chunk.  It is already accounted for. */
+       for (i = 2; phys_avail[i + 1] != 0; i += 2) {
+               if (phys_avail[i + 1] > high_water)
+                       high_water = phys_avail[i + 1];
+       }
 
        /*
         * Calculate the size (in bytes) of the reservation array.  Round up

Modified: head/sys/vm/vm_reserv.h
==============================================================================
--- head/sys/vm/vm_reserv.h     Sun Aug 18 22:20:28 2019        (r351197)
+++ head/sys/vm/vm_reserv.h     Sun Aug 18 23:07:56 2019        (r351198)
@@ -66,8 +66,7 @@ boolean_t     vm_reserv_reclaim_inactive(int domain);
 void           vm_reserv_rename(vm_page_t m, vm_object_t new_object,
                    vm_object_t old_object, vm_pindex_t old_object_offset);
 int            vm_reserv_size(int level);
-vm_paddr_t     vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end,
-                   vm_paddr_t high_water);
+vm_paddr_t     vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end);
 vm_page_t      vm_reserv_to_superpage(vm_page_t m);
 
 #endif /* VM_NRESERVLEVEL > 0 */
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to