The branch stable/14 has been updated by kib:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=0f62297dd1f2084af942924265b7c46b60525699

commit 0f62297dd1f2084af942924265b7c46b60525699
Author:     Konstantin Belousov <k...@freebsd.org>
AuthorDate: 2025-06-23 23:20:56 +0000
Commit:     Konstantin Belousov <k...@freebsd.org>
CommitDate: 2025-07-04 04:57:02 +0000

    amd64: switch to la57 mode before creating kernel page tables
    
    (cherry picked from commit 4e1d69b9fbff280962e5ae5258624b60d5ab4618)
---
 sys/amd64/amd64/locore.S |   2 +
 sys/amd64/amd64/pmap.c   | 207 ++++++++++++++++-------------------------------
 2 files changed, 72 insertions(+), 137 deletions(-)

diff --git a/sys/amd64/amd64/locore.S b/sys/amd64/amd64/locore.S
index 29fbf38cea33..2be555b25160 100644
--- a/sys/amd64/amd64/locore.S
+++ b/sys/amd64/amd64/locore.S
@@ -119,6 +119,8 @@ ENTRY(la57_trampoline)
        leaq    la57_trampoline_end(%rip),%rsp /* priv stack */
 
        movq    %cr0,%rbp
+       leaq    la57_trampoline_gdt(%rip),%rax
+       movq    %rax,la57_trampoline_gdt_desc+2(%rip)
        lgdtq   la57_trampoline_gdt_desc(%rip)
 
        pushq   $(2<<3)
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 8e2c2109e7ad..97e59123df97 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -1686,12 +1686,43 @@ bootaddr_rwx(vm_paddr_t pa)
        return (pg_nx);
 }
 
+extern const char la57_trampoline[];
+
+static void
+pmap_bootstrap_la57(vm_paddr_t *firstaddr)
+{
+       void (*la57_tramp)(uint64_t pml5);
+       pml5_entry_t *pt;
+
+       if ((cpu_stdext_feature2 & CPUID_STDEXT2_LA57) == 0)
+               return;
+       TUNABLE_INT_FETCH("vm.pmap.la57", &la57);
+       if (!la57)
+               return;
+
+       KPML5phys = allocpages(firstaddr, 1);
+       KPML4phys = rcr3() & 0xfffff000; /* pml4 from loader must be < 4G */
+
+       pt = (pml5_entry_t *)KPML5phys;
+       pt[0] = KPML4phys | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M;
+       pt[NPML4EPG - 1] = KPML4phys | X86_PG_V | X86_PG_RW | X86_PG_A |
+           X86_PG_M;
+
+       la57_tramp = (void (*)(uint64_t))((uintptr_t)la57_trampoline -
+           KERNSTART + amd64_loadaddr());
+       printf("Calling la57 trampoline at %p, KPML5phys %#lx ...",
+           la57_tramp, KPML5phys);
+       la57_tramp(KPML5phys);
+       printf(" alive in la57 mode\n");
+}
+
 static void
 create_pagetables(vm_paddr_t *firstaddr)
 {
        pd_entry_t *pd_p;
        pdp_entry_t *pdp_p;
        pml4_entry_t *p4_p;
+       pml5_entry_t *p5_p;
        uint64_t DMPDkernphys;
        vm_paddr_t pax;
 #ifdef KASAN
@@ -1919,6 +1950,27 @@ create_pagetables(vm_paddr_t *firstaddr)
        }
 
        kernel_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys);
+
+       if (la57) {
+               /* XXXKIB bootstrap KPML5phys page is lost */
+               KPML5phys = allocpages(firstaddr, 1);
+               for (i = 0, p5_p = (pml5_entry_t *)KPML5phys; i < NPML5EPG;
+                   i++) {
+                       if (i == PML5PML5I) {
+                               /*
+                                * Recursively map PML5 to itself in
+                                * order to get PTmap and PDmap.
+                                */
+                               p5_p[i] = KPML5phys | X86_PG_RW | X86_PG_A |
+                                   X86_PG_M | X86_PG_V | pg_nx;
+                       } else if (i == pmap_pml5e_index(UPT_MAX_ADDRESS)) {
+                               p5_p[i] = KPML4phys | X86_PG_RW | X86_PG_A |
+                                   X86_PG_M | X86_PG_V;
+                       } else {
+                               p5_p[i] = 0;
+                       }
+               }
+       }
        TSEXIT();
 }
 
@@ -1952,6 +2004,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
        /*
         * Create an initial set of page tables to run the kernel in.
         */
+       pmap_bootstrap_la57(firstaddr);
        create_pagetables(firstaddr);
 
        pcpu0_phys = allocpages(firstaddr, 1);
@@ -1981,7 +2034,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
        cr4 = rcr4();
        cr4 |= CR4_PGE;
        load_cr4(cr4);
-       load_cr3(KPML4phys);
+       load_cr3(la57 ? KPML5phys : KPML4phys);
        if (cpu_stdext_feature & CPUID_STDEXT_SMEP)
                cr4 |= CR4_SMEP;
        if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
@@ -1994,8 +2047,20 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
         * later unmapped (using pmap_remove()) and freed.
         */
        PMAP_LOCK_INIT(kernel_pmap);
-       kernel_pmap->pm_pmltop = kernel_pml4;
-       kernel_pmap->pm_cr3 = KPML4phys;
+       if (la57) {
+               vtoptem = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT +
+                   NPML4EPGSHIFT + NPML5EPGSHIFT)) - 1) << 3;
+               PTmap = (vm_offset_t)P5Tmap;
+               vtopdem = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT +
+                   NPML4EPGSHIFT + NPML5EPGSHIFT)) - 1) << 3;
+               PDmap = (vm_offset_t)P5Dmap;
+               kernel_pmap->pm_pmltop = (void *)PHYS_TO_DMAP(KPML5phys);
+               kernel_pmap->pm_cr3 = KPML5phys;
+               pmap_pt_page_count_adj(kernel_pmap, 1); /* top-level page */
+       } else {
+               kernel_pmap->pm_pmltop = kernel_pml4;
+               kernel_pmap->pm_cr3 = KPML4phys;
+       }
        kernel_pmap->pm_ucr3 = PMAP_NO_CR3;
        TAILQ_INIT(&kernel_pmap->pm_pvchunk);
        kernel_pmap->pm_stats.resident_count = res;
@@ -2050,6 +2115,8 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
        /*
         * Re-initialize PCPU area for BSP after switching.
         * Make hardware use gdt and common_tss from the new PCPU.
+        * Also clears the usage of temporary gdt during switch to
+        * LA57 paging.
         */
        STAILQ_INIT(&cpuhead);
        wrmsr(MSR_GSBASE, (uint64_t)&__pcpu[0]);
@@ -2179,140 +2246,6 @@ pmap_page_alloc_below_4g(bool zeroed)
            1, 0, (1ULL << 32), PAGE_SIZE, 0, VM_MEMATTR_DEFAULT));
 }
 
-extern const char la57_trampoline[], la57_trampoline_gdt_desc[],
-    la57_trampoline_gdt[], la57_trampoline_end[];
-
-static void
-pmap_bootstrap_la57(void *arg __unused)
-{
-       char *v_code;
-       pml5_entry_t *v_pml5;
-       pml4_entry_t *v_pml4;
-       pdp_entry_t *v_pdp;
-       pd_entry_t *v_pd;
-       pt_entry_t *v_pt;
-       vm_page_t m_code, m_pml4, m_pdp, m_pd, m_pt, m_pml5;
-       void (*la57_tramp)(uint64_t pml5);
-       struct region_descriptor r_gdt;
-
-       if ((cpu_stdext_feature2 & CPUID_STDEXT2_LA57) == 0)
-               return;
-       TUNABLE_INT_FETCH("vm.pmap.la57", &la57);
-       if (!la57)
-               return;
-
-       r_gdt.rd_limit = NGDT * sizeof(struct user_segment_descriptor) - 1;
-       r_gdt.rd_base = (long)__pcpu[0].pc_gdt;
-
-       m_code = pmap_page_alloc_below_4g(true);
-       v_code = (char *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_code));
-       m_pml5 = pmap_page_alloc_below_4g(true);
-       KPML5phys = VM_PAGE_TO_PHYS(m_pml5);
-       v_pml5 = (pml5_entry_t *)PHYS_TO_DMAP(KPML5phys);
-       m_pml4 = pmap_page_alloc_below_4g(true);
-       v_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_pml4));
-       m_pdp = pmap_page_alloc_below_4g(true);
-       v_pdp = (pdp_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_pdp));
-       m_pd = pmap_page_alloc_below_4g(true);
-       v_pd = (pdp_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_pd));
-       m_pt = pmap_page_alloc_below_4g(true);
-       v_pt = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_pt));
-
-       /*
-        * Map m_code 1:1, it appears below 4G in KVA due to physical
-        * address being below 4G.  Since kernel KVA is in upper half,
-        * the pml4e should be zero and free for temporary use.
-        */
-       kernel_pmap->pm_pmltop[pmap_pml4e_index(VM_PAGE_TO_PHYS(m_code))] =
-           VM_PAGE_TO_PHYS(m_pdp) | X86_PG_V | X86_PG_RW | X86_PG_A |
-           X86_PG_M;
-       v_pdp[pmap_pdpe_index(VM_PAGE_TO_PHYS(m_code))] =
-           VM_PAGE_TO_PHYS(m_pd) | X86_PG_V | X86_PG_RW | X86_PG_A |
-           X86_PG_M;
-       v_pd[pmap_pde_index(VM_PAGE_TO_PHYS(m_code))] =
-           VM_PAGE_TO_PHYS(m_pt) | X86_PG_V | X86_PG_RW | X86_PG_A |
-           X86_PG_M;
-       v_pt[pmap_pte_index(VM_PAGE_TO_PHYS(m_code))] =
-           VM_PAGE_TO_PHYS(m_code) | X86_PG_V | X86_PG_RW | X86_PG_A |
-           X86_PG_M;
-
-       /*
-        * Add pml5 entry at top of KVA pointing to existing pml4 table,
-        * entering all existing kernel mappings into level 5 table.
-        */
-       v_pml5[pmap_pml5e_index(UPT_MAX_ADDRESS)] = KPML4phys | X86_PG_V |
-           X86_PG_RW | X86_PG_A | X86_PG_M;
-
-       /*
-        * Add pml5 entry for 1:1 trampoline mapping after LA57 is turned on.
-        */
-       v_pml5[pmap_pml5e_index(VM_PAGE_TO_PHYS(m_code))] =
-           VM_PAGE_TO_PHYS(m_pml4) | X86_PG_V | X86_PG_RW | X86_PG_A |
-           X86_PG_M;
-       v_pml4[pmap_pml4e_index(VM_PAGE_TO_PHYS(m_code))] =
-           VM_PAGE_TO_PHYS(m_pdp) | X86_PG_V | X86_PG_RW | X86_PG_A |
-           X86_PG_M;
-
-       /*
-        * Copy and call the 48->57 trampoline, hope we return there, alive.
-        */
-       bcopy(la57_trampoline, v_code, la57_trampoline_end - la57_trampoline);
-       *(u_long *)(v_code + 2 + (la57_trampoline_gdt_desc - la57_trampoline)) =
-           la57_trampoline_gdt - la57_trampoline + VM_PAGE_TO_PHYS(m_code);
-       la57_tramp = (void (*)(uint64_t))VM_PAGE_TO_PHYS(m_code);
-       pmap_invalidate_all(kernel_pmap);
-       if (bootverbose) {
-               printf("entering LA57 trampoline at %#lx\n",
-                   (vm_offset_t)la57_tramp);
-       }
-       la57_tramp(KPML5phys);
-
-       /*
-        * gdt was necessary reset, switch back to our gdt.
-        */
-       lgdt(&r_gdt);
-       wrmsr(MSR_GSBASE, (uint64_t)&__pcpu[0]);
-       load_ds(_udatasel);
-       load_es(_udatasel);
-       load_fs(_ufssel);
-       ssdtosyssd(&gdt_segs[GPROC0_SEL],
-           (struct system_segment_descriptor *)&__pcpu[0].pc_gdt[GPROC0_SEL]);
-       ltr(GSEL(GPROC0_SEL, SEL_KPL));
-       lidt(&r_idt);
-
-       if (bootverbose)
-               printf("LA57 trampoline returned, CR4 %#lx\n", rcr4());
-
-       /*
-        * Now unmap the trampoline, and free the pages.
-        * Clear pml5 entry used for 1:1 trampoline mapping.
-        */
-       pte_clear(&v_pml5[pmap_pml5e_index(VM_PAGE_TO_PHYS(m_code))]);
-       invlpg((vm_offset_t)v_code);
-       vm_page_free(m_code);
-       vm_page_free(m_pdp);
-       vm_page_free(m_pd);
-       vm_page_free(m_pt);
-
-       /* 
-        * Recursively map PML5 to itself in order to get PTmap and
-        * PDmap.
-        */
-       v_pml5[PML5PML5I] = KPML5phys | X86_PG_RW | X86_PG_V | pg_nx;
-
-       vtoptem = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT +
-           NPML4EPGSHIFT + NPML5EPGSHIFT)) - 1) << 3;
-       PTmap = (vm_offset_t)P5Tmap;
-       vtopdem = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT +
-           NPML4EPGSHIFT + NPML5EPGSHIFT)) - 1) << 3;
-       PDmap = (vm_offset_t)P5Dmap;
-
-       kernel_pmap->pm_cr3 = KPML5phys;
-       kernel_pmap->pm_pmltop = v_pml5;
-       pmap_pt_page_count_adj(kernel_pmap, 1);
-}
-SYSINIT(la57, SI_SUB_KMEM, SI_ORDER_ANY, pmap_bootstrap_la57, NULL);
-
 /*
  *     Initialize a vm_page's machine-dependent fields.
  */

Reply via email to