Here is a final (I hope) version of the patch.  I dropped the
config option, but I added code to limit the "real" size of the
direct map PDEs.  The end result is that on small systems, this
ties up 14 more pages (15 from increasing NKPML4E, but one
regained because the new static variable ndmpdpphys is 1 instead
of 2).

(I fixed the comment errors I spotted earlier, too.)

Chris

 amd64/amd64/pmap.c      | 100 +++++++++++++++++++++++++++++-------------------
 amd64/include/pmap.h    |  36 +++++++++++++----
 amd64/include/vmparam.h |  13 ++++---
 3 files changed, 97 insertions(+), 52 deletions(-)

Author: Chris Torek <chris.to...@gmail.com>
Date:   Thu Jun 27 18:49:29 2013 -0600

    increase physical and virtual memory limits
    
    Increase kernel VM space: go from .5 TB of KVA and 1 TB of direct
    map, to 8 TB of KVA and 16 TB of direct map.  However, we allocate
    less direct map space for small physical-memory systems.  Also, if
    Maxmem is so large that there is not enough direct map space,
    reduce Maxmem to fit, so that the system can boot unassisted.

diff --git a/amd64/amd64/pmap.c b/amd64/amd64/pmap.c
index 8dcf232..7368c96 100644
--- a/amd64/amd64/pmap.c
+++ b/amd64/amd64/pmap.c
@@ -232,6 +232,7 @@ u_int64_t           KPML4phys;      /* phys addr of kernel 
level 4 */
 
 static u_int64_t       DMPDphys;       /* phys addr of direct mapped level 2 */
 static u_int64_t       DMPDPphys;      /* phys addr of direct mapped level 3 */
+static int             ndmpdpphys;     /* number of DMPDPphys pages */
 
 static struct rwlock_padalign pvh_global_lock;
 
@@ -531,12 +532,27 @@ static void
 create_pagetables(vm_paddr_t *firstaddr)
 {
        int i, j, ndm1g, nkpdpe;
+       pt_entry_t *pt_p;
+       pd_entry_t *pd_p;
+       pdp_entry_t *pdp_p;
+       pml4_entry_t *p4_p;
 
        /* Allocate page table pages for the direct map */
        ndmpdp = (ptoa(Maxmem) + NBPDP - 1) >> PDPSHIFT;
        if (ndmpdp < 4)         /* Minimum 4GB of dirmap */
                ndmpdp = 4;
-       DMPDPphys = allocpages(firstaddr, NDMPML4E);
+       ndmpdpphys = howmany(ndmpdp, NPML4EPG);
+       if (ndmpdpphys > NDMPML4E) {
+               /*
+                * Each NDMPML4E allows 512 GB, so limit to that,
+                * and then readjust ndmpdp and ndmpdpphys.
+                */
+               printf("NDMPML4E limits system to %d GB\n", NDMPML4E * 512);
+               Maxmem = atop(NDMPML4E * NBPML4);
+               ndmpdpphys = NDMPML4E;
+               ndmpdp = NDMPML4E * NPDEPG;
+       }
+       DMPDPphys = allocpages(firstaddr, ndmpdpphys);
        ndm1g = 0;
        if ((amd_feature & AMDID_PAGE1GB) != 0)
                ndm1g = ptoa(Maxmem) >> PDPSHIFT;
@@ -553,6 +569,10 @@ create_pagetables(vm_paddr_t *firstaddr)
         * bootstrap.  We defer this until after all memory-size dependent
         * allocations are done (e.g. direct map), so that we don't have to
         * build in too much slop in our estimate.
+        *
+        * Note that when NKPML4E > 1, we have an empty page underneath
+        * all but the KPML4I'th one, so we need NKPML4E-1 extra (zeroed)
+        * pages.  (pmap_enter requires a PD page to exist for each KPML4E.)
         */
        nkpt_init(*firstaddr);
        nkpdpe = NKPDPE(nkpt);
@@ -561,32 +581,26 @@ create_pagetables(vm_paddr_t *firstaddr)
        KPDphys = allocpages(firstaddr, nkpdpe);
 
        /* Fill in the underlying page table pages */
-       /* Read-only from zero to physfree */
+       /* Nominally read-only (but really R/W) from zero to physfree */
        /* XXX not fully used, underneath 2M pages */
-       for (i = 0; (i << PAGE_SHIFT) < *firstaddr; i++) {
-               ((pt_entry_t *)KPTphys)[i] = i << PAGE_SHIFT;
-               ((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V | PG_G;
-       }
+       pt_p = (pt_entry_t *)KPTphys;
+       for (i = 0; ptoa(i) < *firstaddr; i++)
+               pt_p[i] = ptoa(i) | PG_RW | PG_V | PG_G;
 
        /* Now map the page tables at their location within PTmap */
-       for (i = 0; i < nkpt; i++) {
-               ((pd_entry_t *)KPDphys)[i] = KPTphys + (i << PAGE_SHIFT);
-               ((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V;
-       }
+       pd_p = (pd_entry_t *)KPDphys;
+       for (i = 0; i < nkpt; i++)
+               pd_p[i] = (KPTphys + ptoa(i)) | PG_RW | PG_V;
 
        /* Map from zero to end of allocations under 2M pages */
        /* This replaces some of the KPTphys entries above */
-       for (i = 0; (i << PDRSHIFT) < *firstaddr; i++) {
-               ((pd_entry_t *)KPDphys)[i] = i << PDRSHIFT;
-               ((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V | PG_PS | PG_G;
-       }
+       for (i = 0; (i << PDRSHIFT) < *firstaddr; i++)
+               pd_p[i] = (i << PDRSHIFT) | PG_RW | PG_V | PG_PS | PG_G;
 
-       /* And connect up the PD to the PDP */
-       for (i = 0; i < nkpdpe; i++) {
-               ((pdp_entry_t *)KPDPphys)[i + KPDPI] = KPDphys +
-                   (i << PAGE_SHIFT);
-               ((pdp_entry_t *)KPDPphys)[i + KPDPI] |= PG_RW | PG_V | PG_U;
-       }
+       /* And connect up the PD to the PDP (leaving room for L4 pages) */
+       pdp_p = (pdp_entry_t *)(KPDPphys + ptoa(KPML4I - KPML4BASE));
+       for (i = 0; i < nkpdpe; i++)
+               pdp_p[i + KPDPI] = (KPDphys + ptoa(i)) | PG_RW | PG_V | PG_U;
 
        /*
         * Now, set up the direct map region using 2MB and/or 1GB pages.  If
@@ -596,37 +610,41 @@ create_pagetables(vm_paddr_t *firstaddr)
         * memory, pmap_change_attr() will demote any 2MB or 1GB page mappings
         * that are partially used. 
         */
+       pd_p = (pd_entry_t *)DMPDphys;
        for (i = NPDEPG * ndm1g, j = 0; i < NPDEPG * ndmpdp; i++, j++) {
-               ((pd_entry_t *)DMPDphys)[j] = (vm_paddr_t)i << PDRSHIFT;
+               pd_p[j] = (vm_paddr_t)i << PDRSHIFT;
                /* Preset PG_M and PG_A because demotion expects it. */
-               ((pd_entry_t *)DMPDphys)[j] |= PG_RW | PG_V | PG_PS | PG_G |
+               pd_p[j] |= PG_RW | PG_V | PG_PS | PG_G |
                    PG_M | PG_A;
        }
+       pdp_p = (pdp_entry_t *)DMPDPphys;
        for (i = 0; i < ndm1g; i++) {
-               ((pdp_entry_t *)DMPDPphys)[i] = (vm_paddr_t)i << PDPSHIFT;
+               pdp_p[i] = (vm_paddr_t)i << PDPSHIFT;
                /* Preset PG_M and PG_A because demotion expects it. */
-               ((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_PS | PG_G |
+               pdp_p[i] |= PG_RW | PG_V | PG_PS | PG_G |
                    PG_M | PG_A;
        }
        for (j = 0; i < ndmpdp; i++, j++) {
-               ((pdp_entry_t *)DMPDPphys)[i] = DMPDphys + (j << PAGE_SHIFT);
-               ((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_U;
+               pdp_p[i] = DMPDphys + ptoa(j);
+               pdp_p[i] |= PG_RW | PG_V | PG_U;
        }
 
        /* And recursively map PML4 to itself in order to get PTmap */
-       ((pdp_entry_t *)KPML4phys)[PML4PML4I] = KPML4phys;
-       ((pdp_entry_t *)KPML4phys)[PML4PML4I] |= PG_RW | PG_V | PG_U;
+       p4_p = (pml4_entry_t *)KPML4phys;
+       p4_p[PML4PML4I] = KPML4phys;
+       p4_p[PML4PML4I] |= PG_RW | PG_V | PG_U;
 
        /* Connect the Direct Map slot(s) up to the PML4. */
-       for (i = 0; i < NDMPML4E; i++) {
-               ((pdp_entry_t *)KPML4phys)[DMPML4I + i] = DMPDPphys +
-                   (i << PAGE_SHIFT);
-               ((pdp_entry_t *)KPML4phys)[DMPML4I + i] |= PG_RW | PG_V | PG_U;
+       for (i = 0; i < ndmpdpphys; i++) {
+               p4_p[DMPML4I + i] = DMPDPphys + ptoa(i);
+               p4_p[DMPML4I + i] |= PG_RW | PG_V | PG_U;
        }
 
-       /* Connect the KVA slot up to the PML4 */
-       ((pdp_entry_t *)KPML4phys)[KPML4I] = KPDPphys;
-       ((pdp_entry_t *)KPML4phys)[KPML4I] |= PG_RW | PG_V | PG_U;
+       /* Connect the KVA slots up to the PML4 */
+       for (i = 0; i < NKPML4E; i++) {
+               p4_p[KPML4BASE + i] = KPDPphys + ptoa(i);
+               p4_p[KPML4BASE + i] |= PG_RW | PG_V | PG_U;
+       }
 }
 
 /*
@@ -1685,8 +1703,11 @@ pmap_pinit(pmap_t pmap)
                pagezero(pmap->pm_pml4);
 
        /* Wire in kernel global address entries. */
-       pmap->pm_pml4[KPML4I] = KPDPphys | PG_RW | PG_V | PG_U;
-       for (i = 0; i < NDMPML4E; i++) {
+       for (i = 0; i < NKPML4E; i++) {
+               pmap->pm_pml4[KPML4BASE + i] = (KPDPphys + (i << PAGE_SHIFT)) |
+                   PG_RW | PG_V | PG_U;
+       }
+       for (i = 0; i < ndmpdpphys; i++) {
                pmap->pm_pml4[DMPML4I + i] = (DMPDPphys + (i << PAGE_SHIFT)) |
                    PG_RW | PG_V | PG_U;
        }
@@ -1941,8 +1962,9 @@ pmap_release(pmap_t pmap)
 
        m = PHYS_TO_VM_PAGE(pmap->pm_pml4[PML4PML4I] & PG_FRAME);
 
-       pmap->pm_pml4[KPML4I] = 0;      /* KVA */
-       for (i = 0; i < NDMPML4E; i++)  /* Direct Map */
+       for (i = 0; i < NKPML4E; i++)   /* KVA */
+               pmap->pm_pml4[KPML4BASE + i] = 0;
+       for (i = 0; i < ndmpdpphys; i++)/* Direct Map */
                pmap->pm_pml4[DMPML4I + i] = 0;
        pmap->pm_pml4[PML4PML4I] = 0;   /* Recursive Mapping */
 
diff --git a/amd64/include/pmap.h b/amd64/include/pmap.h
index dc02e49..da80241 100644
--- a/amd64/include/pmap.h
+++ b/amd64/include/pmap.h
@@ -113,28 +113,50 @@
        ((unsigned long)(l2) << PDRSHIFT) | \
        ((unsigned long)(l1) << PAGE_SHIFT))
 
-#define NKPML4E                1               /* number of kernel PML4 slots 
*/
+/*
+ * Number of kernel PML4 slots.  Can be anywhere from 1 to 64 or so,
+ * but setting it larger than NDMPML4E makes no sense.
+ *
+ * Each slot provides .5 TB of kernel virtual space.
+ */
+#define NKPML4E                16
 
 #define        NUPML4E         (NPML4EPG/2)    /* number of userland PML4 
pages */
 #define        NUPDPE          (NUPML4E*NPDPEPG)/* number of userland PDP 
pages */
 #define        NUPDE           (NUPDPE*NPDEPG) /* number of userland PD 
entries */
 
 /*
- * NDMPML4E is the number of PML4 entries that are used to implement the
- * direct map.  It must be a power of two.
+ * NDMPML4E is the maximum number of PML4 entries that will be
+ * used to implement the direct map.  It must be a power of two,
+ * and should generally exceed NKPML4E.  The maximum possible
+ * value is 64; using 128 will make the direct map intrude into
+ * the recursive page table map.
  */
-#define        NDMPML4E        2
+#define        NDMPML4E        32
 
 /*
- * The *PDI values control the layout of virtual memory.  The starting address
+ * These values control the layout of virtual memory.  The starting address
  * of the direct map, which is controlled by DMPML4I, must be a multiple of
  * its size.  (See the PHYS_TO_DMAP() and DMAP_TO_PHYS() macros.)
+ *
+ * Note: KPML4I is the index of the (single) level 4 page that maps
+ * the KVA that holds KERNBASE, while KPML4BASE is the index of the
+ * first level 4 page that maps VM_MIN_KERNEL_ADDRESS.  If NKPML4E
+ * is 1, these are the same, otherwise KPML4BASE < KPML4I and extra
+ * level 4 PDEs are needed to map from VM_MIN_KERNEL_ADDRESS up to
+ * KERNBASE.  Similarly, if KMPL4I < (base+N), extra level 4 PDEs are
+ * needed to map from somewhere-above-KERNBASE to VM_MAX_KERNEL_ADDRESS.
+ *
+ * (KPML4I combines with KPDPI to choose where KERNBASE starts.
+ * Or, in other words, KPML4I provides bits 39..46 of KERNBASE,
+ * and KPDPI provides bits 30..38.)
  */
 #define        PML4PML4I       (NPML4EPG/2)    /* Index of recursive pml4 
mapping */
 
-#define        KPML4I          (NPML4EPG-1)    /* Top 512GB for KVM */
-#define        DMPML4I         rounddown(KPML4I - NDMPML4E, NDMPML4E) /* Below 
KVM */
+#define        KPML4BASE       (NPML4EPG-NKPML4E) /* KVM at highest addresses 
*/
+#define        DMPML4I         rounddown(KPML4BASE-NDMPML4E, NDMPML4E) /* 
Below KVM */
 
+#define        KPML4I          (NPML4EPG-1)
 #define        KPDPI           (NPDPEPG-2)     /* kernbase at -2GB */
 
 /*
diff --git a/amd64/include/vmparam.h b/amd64/include/vmparam.h
index 33f62bd..cff2558 100644
--- a/amd64/include/vmparam.h
+++ b/amd64/include/vmparam.h
@@ -145,18 +145,19 @@
  * 0x0000000000000000 - 0x00007fffffffffff   user map
  * 0x0000800000000000 - 0xffff7fffffffffff   does not exist (hole)
  * 0xffff800000000000 - 0xffff804020100fff   recursive page table (512GB slot)
- * 0xffff804020101000 - 0xfffffdffffffffff   unused
- * 0xfffffe0000000000 - 0xfffffeffffffffff   1TB direct map
- * 0xffffff0000000000 - 0xffffff7fffffffff   unused
- * 0xffffff8000000000 - 0xffffffffffffffff   512GB kernel map
+ * 0xffff804020101000 - 0xffffdfffffffffff   unused
+ * 0xffffe00000000000 - 0xffffefffffffffff   16TB direct map
+ * 0xfffff00000000000 - 0xfffff7ffffffffff   unused
+ * 0xfffff80000000000 - 0xffffffffffffffff   8TB kernel map
  *
  * Within the kernel map:
  *
  * 0xffffffff80000000                        KERNBASE
  */
 
-#define        VM_MAX_KERNEL_ADDRESS   KVADDR(KPML4I, NPDPEPG-1, NPDEPG-1, 
NPTEPG-1)
-#define        VM_MIN_KERNEL_ADDRESS   KVADDR(KPML4I, NPDPEPG-512, 0, 0)
+#define        VM_MIN_KERNEL_ADDRESS   KVADDR(KPML4BASE, 0, 0, 0)
+#define        VM_MAX_KERNEL_ADDRESS   KVADDR(KPML4BASE + NKPML4E - 1, \
+                                       NPDPEPG-1, NPDEPG-1, NPTEPG-1)
 
 #define        DMAP_MIN_ADDRESS        KVADDR(DMPML4I, 0, 0, 0)
 #define        DMAP_MAX_ADDRESS        KVADDR(DMPML4I + NDMPML4E, 0, 0, 0)
_______________________________________________
freebsd-hackers@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-hackers
To unsubscribe, send any mail to "freebsd-hackers-unsubscr...@freebsd.org"

Reply via email to