Author: nwhitehorn
Date: Tue Mar 13 15:03:58 2018
New Revision: 330845
URL: https://svnweb.freebsd.org/changeset/base/330845

Log:
  Execute PowerPC64/AIM kernel from direct map region when possible.
  
  When the kernel can be in real mode in early boot, we can execute from
  high addresses aliased to the kernel's physical memory. If that high
  address has the first two bits set to 1 (0xc...), those addresses will
  automatically become part of the direct map. This reduces page table
  pressure from the kernel and it sets up the kernel to be used with
  radix translation, for which it has to be up here.
  
  This is accomplished by exploiting the fact that all PowerPC kernels are
  built as position-independent executables and relocate themselves
  on start. Before this patch, the kernel runs at 1:1 VA:PA, but that
  VA/PA is random and set by the bootloader. Very early, it processes
  its ELF relocations to operate wherever it happens to find itself.
  This patch uses that mechanism to re-enter and re-relocate the kernel
  a second time witha new base address set up in the early parts of
  powerpc_init().
  
  Reviewed by:  jhibbits
  Differential Revision:        D14647

Modified:
  head/sys/powerpc/aim/aim_machdep.c
  head/sys/powerpc/aim/locore64.S
  head/sys/powerpc/aim/mmu_oea64.c
  head/sys/powerpc/ofw/ofwcall64.S
  head/sys/powerpc/powerpc/machdep.c

Modified: head/sys/powerpc/aim/aim_machdep.c
==============================================================================
--- head/sys/powerpc/aim/aim_machdep.c  Tue Mar 13 15:02:46 2018        
(r330844)
+++ head/sys/powerpc/aim/aim_machdep.c  Tue Mar 13 15:03:58 2018        
(r330845)
@@ -160,15 +160,72 @@ extern void       *dlmisstrap, *dlmisssize;
 extern void    *dsmisstrap, *dsmisssize;
 
 extern void *ap_pcpu;
+extern void __restartkernel(vm_offset_t, vm_offset_t, vm_offset_t, void *, 
uint32_t, register_t offset, register_t msr);
 
+void aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry,
+    void *mdp, uint32_t mdp_cookie);
 void aim_cpu_init(vm_offset_t toc);
 
 void
+aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void 
*mdp,
+    uint32_t mdp_cookie)
+{
+       register_t      scratch;
+
+       /*
+        * If running from an FDT, make sure we are in real mode to avoid
+        * tromping on firmware page tables. Everything in the kernel assumes
+        * 1:1 mappings out of firmware, so this won't break anything not
+        * already broken. This doesn't work if there is live OF, since OF
+        * may internally use non-1:1 mappings.
+        */
+       if (ofentry == 0)
+               mtmsr(mfmsr() & ~(PSL_IR | PSL_DR));
+
+#ifdef __powerpc64__
+       /*
+        * If in real mode, relocate to high memory so that the kernel
+        * can execute from the direct map.
+        */
+       if (!(mfmsr() & PSL_DR) &&
+           (vm_offset_t)&aim_early_init < DMAP_BASE_ADDRESS)
+               __restartkernel(fdt, 0, ofentry, mdp, mdp_cookie,
+                   DMAP_BASE_ADDRESS, mfmsr());
+#endif
+
+       /* Various very early CPU fix ups */
+       switch (mfpvr() >> 16) {
+               /*
+                * PowerPC 970 CPUs have a misfeature requested by Apple that
+                * makes them pretend they have a 32-byte cacheline. Turn this
+                * off before we measure the cacheline size.
+                */
+               case IBM970:
+               case IBM970FX:
+               case IBM970MP:
+               case IBM970GX:
+                       scratch = mfspr(SPR_HID5);
+                       scratch &= ~HID5_970_DCBZ_SIZE_HI;
+                       mtspr(SPR_HID5, scratch);
+                       break;
+       #ifdef __powerpc64__
+               case IBMPOWER7:
+               case IBMPOWER7PLUS:
+               case IBMPOWER8:
+               case IBMPOWER8E:
+                       /* XXX: get from ibm,slb-size in device tree */
+                       n_slbs = 32;
+                       break;
+       #endif
+       }
+}
+
+void
 aim_cpu_init(vm_offset_t toc)
 {
        size_t          trap_offset, trapsize;
        vm_offset_t     trap;
-       register_t      msr, scratch;
+       register_t      msr;
        uint8_t         *cache_check;
        int             cacheline_warn;
        #ifndef __powerpc64__
@@ -198,32 +255,6 @@ aim_cpu_init(vm_offset_t toc)
         * Bits 1-4, 10-15 (ppc32), 33-36, 42-47 (ppc64)
         */
        psl_userstatic &= ~0x783f0000UL;
-
-       /* Various very early CPU fix ups */
-       switch (mfpvr() >> 16) {
-               /*
-                * PowerPC 970 CPUs have a misfeature requested by Apple that
-                * makes them pretend they have a 32-byte cacheline. Turn this
-                * off before we measure the cacheline size.
-                */
-               case IBM970:
-               case IBM970FX:
-               case IBM970MP:
-               case IBM970GX:
-                       scratch = mfspr(SPR_HID5);
-                       scratch &= ~HID5_970_DCBZ_SIZE_HI;
-                       mtspr(SPR_HID5, scratch);
-                       break;
-       #ifdef __powerpc64__
-               case IBMPOWER7:
-               case IBMPOWER7PLUS:
-               case IBMPOWER8:
-               case IBMPOWER8E:
-                       /* XXX: get from ibm,slb-size in device tree */
-                       n_slbs = 32;
-                       break;
-       #endif
-       }
 
        /*
         * Initialize the interrupt tables and figure out our cache line

Modified: head/sys/powerpc/aim/locore64.S
==============================================================================
--- head/sys/powerpc/aim/locore64.S     Tue Mar 13 15:02:46 2018        
(r330844)
+++ head/sys/powerpc/aim/locore64.S     Tue Mar 13 15:03:58 2018        
(r330845)
@@ -105,7 +105,6 @@ ap_kexec_start:             /* At 0x60 past start, copied 
to 0x60
        mtsrr1  %r1
        ba      EXC_RST
 
-
 /*
  * Now start the real text section
  */
@@ -149,9 +148,12 @@ ASENTRY_NOPROF(__start)
        subf    %r31,%r31,%r2   /* Subtract from real TOC base to get base */
 
        /* Set up the stack pointer */
-       ld      %r1,TOC_REF(tmpstk)(%r2)
-       addi    %r1,%r1,TMPSTKSZ-96
-       add     %r1,%r1,%r31
+       bl      1f
+       .llong  tmpstk + TMPSTKSZ - 96 - .
+1:     mflr    %r30
+       ld      %r1,0(%r30)
+       add     %r1,%r1,%r30
+       nop
 
        /* Relocate kernel */
        std     %r3,48(%r1)
@@ -188,5 +190,21 @@ ASENTRY_NOPROF(__start)
 
        /* Unreachable */
        b       .
+
+ASENTRY_NOPROF(__restartkernel)
+       /*
+        * r3-r7: arguments to go to __start
+        * r8: offset from current kernel address to apply
+        * r9: MSR to set when (atomically) jumping to __start + r8
+        */
+       mtsrr1  %r9
+       bl      1f
+1:     mflr    %r25
+       add     %r25,%r8,%r25
+       addi    %r25,%r25,2f-1b
+       mtsrr0  %r25
+       rfid
+2:     bl      __start
+       nop
 
 #include <powerpc/aim/trap_subr64.S>

Modified: head/sys/powerpc/aim/mmu_oea64.c
==============================================================================
--- head/sys/powerpc/aim/mmu_oea64.c    Tue Mar 13 15:02:46 2018        
(r330844)
+++ head/sys/powerpc/aim/mmu_oea64.c    Tue Mar 13 15:03:58 2018        
(r330845)
@@ -701,6 +701,7 @@ moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernels
 {
        int             i, j;
        vm_size_t       physsz, hwphyssz;
+       vm_paddr_t      kernelphysstart, kernelphysend;
 
 #ifndef __powerpc64__
        /* We don't have a direct map since there is no BAT */
@@ -727,6 +728,9 @@ moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernels
        __syncicache((void *)EXC_ISE, 0x80);
 #endif
 
+       kernelphysstart = kernelstart & ~DMAP_BASE_ADDRESS;
+       kernelphysend = kernelend & ~DMAP_BASE_ADDRESS;
+
        /* Get physical memory regions from firmware */
        mem_regions(&pregions, &pregions_sz, &regions, &regions_sz);
        CTR0(KTR_PMAP, "moea64_bootstrap: physical memory");
@@ -764,29 +768,30 @@ moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernels
                if (phys_avail[j] < EXC_LAST)
                        phys_avail[j] += EXC_LAST;
 
-               if (kernelstart >= phys_avail[j] &&
-                   kernelstart < phys_avail[j+1]) {
-                       if (kernelend < phys_avail[j+1]) {
+               if (kernelphysstart >= phys_avail[j] &&
+                   kernelphysstart < phys_avail[j+1]) {
+                       if (kernelphysend < phys_avail[j+1]) {
                                phys_avail[2*phys_avail_count] =
-                                   (kernelend & ~PAGE_MASK) + PAGE_SIZE;
+                                   (kernelphysend & ~PAGE_MASK) + PAGE_SIZE;
                                phys_avail[2*phys_avail_count + 1] =
                                    phys_avail[j+1];
                                phys_avail_count++;
                        }
 
-                       phys_avail[j+1] = kernelstart & ~PAGE_MASK;
+                       phys_avail[j+1] = kernelphysstart & ~PAGE_MASK;
                }
 
-               if (kernelend >= phys_avail[j] &&
-                   kernelend < phys_avail[j+1]) {
-                       if (kernelstart > phys_avail[j]) {
+               if (kernelphysend >= phys_avail[j] &&
+                   kernelphysend < phys_avail[j+1]) {
+                       if (kernelphysstart > phys_avail[j]) {
                                phys_avail[2*phys_avail_count] = phys_avail[j];
                                phys_avail[2*phys_avail_count + 1] =
-                                   kernelstart & ~PAGE_MASK;
+                                   kernelphysstart & ~PAGE_MASK;
                                phys_avail_count++;
                        }
 
-                       phys_avail[j] = (kernelend & ~PAGE_MASK) + PAGE_SIZE;
+                       phys_avail[j] = (kernelphysend & ~PAGE_MASK) +
+                           PAGE_SIZE;
                }
        }
 

Modified: head/sys/powerpc/ofw/ofwcall64.S
==============================================================================
--- head/sys/powerpc/ofw/ofwcall64.S    Tue Mar 13 15:02:46 2018        
(r330844)
+++ head/sys/powerpc/ofw/ofwcall64.S    Tue Mar 13 15:03:58 2018        
(r330845)
@@ -42,7 +42,7 @@
 ofwstk:
        .space  OFWSTKSZ
 rtas_regsave:
-       .space  24 /* 3 * sizeof(register_t) */
+       .space  32 /* 4 * sizeof(register_t) */
 GLOBAL(ofmsr)
        .llong  0, 0, 0, 0, 0           /* msr/sprg0-3 used in Open Firmware */
 GLOBAL(rtasmsr)
@@ -64,8 +64,8 @@ TOC_ENTRY(rtas_regsave)
  */
 
 ASENTRY_NOPROF(ofwcall)
-       mflr    %r0
-       std     %r0,16(%r1)
+       mflr    %r8
+       std     %r8,16(%r1)
        stdu    %r1,-208(%r1)
 
        /*
@@ -106,7 +106,7 @@ ASENTRY_NOPROF(ofwcall)
 
        /* Get OF stack pointer */
        ld      %r7,TOC_REF(ofwstk)(%r2)
-       addi    %r7,%r7,OFWSTKSZ-32
+       addi    %r7,%r7,OFWSTKSZ-40
 
        /*
         * Set the MSR to the OF value. This has the side effect of disabling
@@ -129,6 +129,8 @@ ASENTRY_NOPROF(ofwcall)
        std     %r5,8(%r1)      /* Save real stack pointer */
        std     %r2,16(%r1)     /* Save old TOC */
        std     %r6,24(%r1)     /* Save old MSR */
+       std     %r8,32(%r1)     /* Save high 32-bits of the kernel's PC */
+
        li      %r5,0
        stw     %r5,4(%r1)
        stw     %r5,0(%r1)
@@ -137,15 +139,23 @@ ASENTRY_NOPROF(ofwcall)
        mtctr   %r4
        bctrl
 
-       /* Reload stack pointer and MSR from the OFW stack */
+       /* Reload stack pointer, MSR, and reference PC from the OFW stack */
+       ld      %r7,32(%r1)
        ld      %r6,24(%r1)
        ld      %r2,16(%r1)
        ld      %r1,8(%r1)
 
-       /* Now set the real MSR */
-       mtmsrd  %r6
-       isync
+       /* Get back to the MSR/PC we want, using the cached high bits of PC */
+       mtsrr1  %r6
+       clrrdi  %r7,%r7,32
+       bl      1f
+1:     mflr    %r8
+       or      %r8,%r8,%r7
+       addi    %r8,%r8,2f-1b
+       mtsrr0  %r8
+       rfid                    /* Turn on MMU, exceptions, and 64-bit mode */
 
+2:
        /* Sign-extend the return value from OF */
        extsw   %r3,%r3
 
@@ -186,8 +196,8 @@ ASENTRY_NOPROF(ofwcall)
  */
 
 ASENTRY_NOPROF(rtascall)
-       mflr    %r0
-       std     %r0,16(%r1)
+       mflr    %r9
+       std     %r9,16(%r1)
        stdu    %r1,-208(%r1)
 
        /*
@@ -248,24 +258,41 @@ ASENTRY_NOPROF(rtascall)
        std     %r7,0(%r1)      /* Save 64-bit stack pointer */
        std     %r2,8(%r1)      /* Save TOC */
        std     %r6,16(%r1)     /* Save MSR */
+       std     %r9,24(%r1)     /* Save reference PC for high 32 bits */
 
        /* Finally, branch to RTAS */
        mtctr   %r5
        bctrl
 
        /* 
-        * Reload stack pointer and MSR from the reg save area in r1. We are
-        * running in 32-bit mode at this point, so it doesn't matter if r1
+        * Reload stack pointer, MSR, reg PC from the reg save area in r1. We
+        * are running in 32-bit mode at this point, so it doesn't matter if r1
         * has become sign-extended.
         */
+       ld      %r7,24(%r1)
        ld      %r6,16(%r1)
        ld      %r2,8(%r1)
        ld      %r1,0(%r1)
 
-       /* Now set the real MSR */
-       mtmsrd  %r6
-       isync
+       /*
+        * Get back to the right PC. We need to atomically re-enable
+        * exceptions, 64-bit mode, and the MMU. One thing that has likely
+        * happened is that, if we were running in the high-memory direct
+        * map, we no longer are as a result of LR truncation in RTAS.
+        * Fix this by copying the high-order bits of the LR at function
+        * entry onto the current PC and then jumping there while flipping
+        * all the MSR bits.
+        */
+       mtsrr1  %r6
+       clrrdi  %r7,%r7,32
+       bl      1f
+1:     mflr    %r8
+       or      %r8,%r8,%r7
+       addi    %r8,%r8,2f-1b
+       mtsrr0  %r8
+       rfid                    /* Turn on MMU, exceptions, and 64-bit mode */
 
+2:
        /* Sign-extend the return value from RTAS */
        extsw   %r3,%r3
 

Modified: head/sys/powerpc/powerpc/machdep.c
==============================================================================
--- head/sys/powerpc/powerpc/machdep.c  Tue Mar 13 15:02:46 2018        
(r330844)
+++ head/sys/powerpc/powerpc/machdep.c  Tue Mar 13 15:03:58 2018        
(r330845)
@@ -237,6 +237,8 @@ extern unsigned char        __sbss_start[];
 extern unsigned char   __sbss_end[];
 extern unsigned char   _end[];
 
+void aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry,
+    void *mdp, uint32_t mdp_cookie);
 void aim_cpu_init(vm_offset_t toc);
 void booke_cpu_init(void);
 
@@ -247,7 +249,6 @@ powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offs
        struct          pcpu *pc;
        struct cpuref   bsp;
        vm_offset_t     startkernel, endkernel;
-       void            *kmdp;
        char            *env;
         bool           ofw_bootargs = false;
 #ifdef DDB
@@ -255,8 +256,6 @@ powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offs
        vm_offset_t ksym_end;
 #endif
 
-       kmdp = NULL;
-
        /* First guess at start/end kernel positions */
        startkernel = __startkernel;
        endkernel = __endkernel;
@@ -278,15 +277,7 @@ powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offs
 #endif
 
 #ifdef AIM
-       /*
-        * If running from an FDT, make sure we are in real mode to avoid
-        * tromping on firmware page tables. Everything in the kernel assumes
-        * 1:1 mappings out of firmware, so this won't break anything not
-        * already broken. This doesn't work if there is live OF, since OF
-        * may internally use non-1:1 mappings.
-        */
-       if (ofentry == 0)
-               mtmsr(mfmsr() & ~(PSL_IR | PSL_DR));
+       aim_early_init(fdt, toc, ofentry, mdp, mdp_cookie);
 #endif
 
        /*
@@ -295,14 +286,33 @@ powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offs
         * boothowto.
         */
        if (mdp != NULL) {
+               void *kmdp = NULL;
+               char *envp = NULL;
+               uintptr_t md_offset = 0;
+               vm_paddr_t kernelendphys;
+
+#ifdef AIM
+               if ((uintptr_t)&powerpc_init > DMAP_BASE_ADDRESS)
+                       md_offset = DMAP_BASE_ADDRESS;
+#endif
+
                preload_metadata = mdp;
+               if (md_offset > 0) {
+                       preload_metadata += md_offset;
+                       preload_bootstrap_relocate(md_offset);
+               }
                kmdp = preload_search_by_type("elf kernel");
                if (kmdp != NULL) {
                        boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
-                       init_static_kenv(MD_FETCH(kmdp, MODINFOMD_ENVP, char *),
-                           0);
-                       endkernel = ulmax(endkernel, MD_FETCH(kmdp,
-                           MODINFOMD_KERNEND, vm_offset_t));
+                       envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *);
+                       if (envp != NULL)
+                               envp += md_offset;
+                       init_static_kenv(envp, 0);
+                       kernelendphys = MD_FETCH(kmdp, MODINFOMD_KERNEND,
+                           vm_offset_t);
+                       if (kernelendphys != 0)
+                               kernelendphys += md_offset;
+                       endkernel = ulmax(endkernel, kernelendphys);
 #ifdef DDB
                        ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
                        ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to